From f5e8be9dbb68bd471d4c3dfc68c6a544486a9835 Mon Sep 17 00:00:00 2001 From: Tim Davis Date: Sat, 19 Oct 2019 14:29:30 -0500 Subject: [PATCH] v5.1.2 --- .gitignore | 222 - AMD/Doc/Makefile | 4 +- CAMD/Doc/Makefile | 4 +- CSparse/Lib/Makefile | 2 +- ChangeLog | 13 + GraphBLAS/.gitignore | 1 + GraphBLAS/CMakeLists.txt | 60 +- GraphBLAS/Demo/Include/demos.h | 6 +- GraphBLAS/Demo/MATLAB/tri_matlab.m | 106 + GraphBLAS/Demo/MATLAB/tri_matlab_out.txt | 198 + GraphBLAS/Demo/Output/bfs_demo.out | 234 +- GraphBLAS/Demo/Output/go3_out_laptop.txt | 190 + GraphBLAS/Demo/Output/go_out_cholesky.txt | 2662 +++ GraphBLAS/Demo/Output/go_out_laptop.txt | 2660 +++ GraphBLAS/Demo/Output/mis_demo.out | 116 +- GraphBLAS/Demo/Output/simple_demo.out | 6 +- GraphBLAS/Demo/Output/tri_demo.out | 720 +- GraphBLAS/Demo/Program/bfs_demo.c | 4 +- GraphBLAS/Demo/Program/mis_demo.c | 2 +- GraphBLAS/Demo/Program/tri_demo.c | 88 +- GraphBLAS/Demo/Program/wildtype_demo.c | 1 + GraphBLAS/Demo/README.txt | 15 +- GraphBLAS/Demo/Source/get_matrix.c | 6 +- GraphBLAS/Demo/Source/read_matrix.c | 98 +- GraphBLAS/Demo/demo | 4 +- GraphBLAS/Demo/go3 | 7 + GraphBLAS/Demo/t1 | 3 - GraphBLAS/Demo/tdemo | 24 - GraphBLAS/Doc/ChangeLog | 20 +- GraphBLAS/Doc/GraphBLAS_UserGuide.pdf | Bin 618731 -> 621314 bytes GraphBLAS/Doc/GraphBLAS_UserGuide.tex | 385 +- GraphBLAS/Doc/Makefile | 11 + GraphBLAS/Include/GraphBLAS.h | 110 +- GraphBLAS/Makefile | 43 +- GraphBLAS/README.txt | 13 +- GraphBLAS/Source/GB.h | 173 +- GraphBLAS/Source/GB_AxB_builtin.c | 167 +- GraphBLAS/Source/GB_AxB_methods.h | 213 + GraphBLAS/Source/GB_AxB_numeric.c | 95 +- GraphBLAS/Source/GB_AxB_symbolic.c | 7 +- GraphBLAS/Source/GB_Flag_alloc.c | 5 +- GraphBLAS/Source/GB_Flag_free.c | 3 +- GraphBLAS/Source/GB_Mark_alloc.c | 5 +- GraphBLAS/Source/GB_Mark_free.c | 3 +- GraphBLAS/Source/GB_Matrix_AdotB.c | 584 +- GraphBLAS/Source/GB_Matrix_alloc.c | 4 +- GraphBLAS/Source/GB_Matrix_dup.c | 21 +- GraphBLAS/Source/GB_Matrix_free.c | 4 +- GraphBLAS/Source/GB_Matrix_ixfree.c | 4 +- GraphBLAS/Source/GB_Matrix_multiply.c | 35 +- GraphBLAS/Source/GB_Matrix_realloc.c | 2 +- GraphBLAS/Source/GB_Matrix_transplant.c | 15 +- GraphBLAS/Source/GB_Monoid_new.c | 2 +- GraphBLAS/Source/GB_Work_free.c | 3 +- GraphBLAS/Source/GB_accum_mask.c | 1 + GraphBLAS/Source/GB_add_pending.c | 2 +- GraphBLAS/Source/GB_assign.c | 242 +- GraphBLAS/Source/GB_build.c | 20 +- GraphBLAS/Source/GB_build_factory.c | 13 +- GraphBLAS/Source/GB_builder.c | 9 +- GraphBLAS/Source/GB_calloc_memory.c | 38 +- GraphBLAS/Source/GB_free_memory.c | 32 +- GraphBLAS/Source/GB_free_pending.c | 6 +- GraphBLAS/Source/GB_malloc_memory.c | 43 +- GraphBLAS/Source/GB_mxm.c | 14 +- GraphBLAS/Source/GB_new.c | 2 +- GraphBLAS/Source/GB_object_check.c | 2 +- GraphBLAS/Source/GB_queue_check.c | 2 +- GraphBLAS/Source/GB_queue_init.c | 49 - GraphBLAS/Source/GB_queue_insert.c | 23 +- GraphBLAS/Source/GB_queue_remove.c | 36 +- GraphBLAS/Source/GB_queue_remove_head.c | 25 +- GraphBLAS/Source/GB_realloc_memory.c | 68 +- GraphBLAS/Source/GB_reduce_to_column.c | 2 + GraphBLAS/Source/GB_reduce_to_scalar.c | 117 +- GraphBLAS/Source/GB_semiring_builtin.c | 12 +- GraphBLAS/Source/GB_subassign.c | 67 +- GraphBLAS/Source/GB_subassign_kernel.c | 15 +- GraphBLAS/Source/GB_transpose_ix.c | 2 +- GraphBLAS/Source/GB_transpose_op.c | 2 +- GraphBLAS/Source/GB_transpose_pattern.c | 2 +- GraphBLAS/Source/GB_wait.c | 5 +- .../Source/Generated/GB_AxB__eq_eq_bool.c | 513 + .../Source/Generated/GB_AxB__eq_eq_fp32.c | 513 + .../Source/Generated/GB_AxB__eq_eq_fp64.c | 513 + .../Source/Generated/GB_AxB__eq_eq_int16.c | 513 + .../Source/Generated/GB_AxB__eq_eq_int32.c | 513 + .../Source/Generated/GB_AxB__eq_eq_int64.c | 513 + .../Source/Generated/GB_AxB__eq_eq_int8.c | 513 + .../Source/Generated/GB_AxB__eq_eq_uint16.c | 513 + .../Source/Generated/GB_AxB__eq_eq_uint32.c | 513 + .../Source/Generated/GB_AxB__eq_eq_uint64.c | 513 + .../Source/Generated/GB_AxB__eq_eq_uint8.c | 513 + .../Source/Generated/GB_AxB__eq_first_bool.c | 513 + .../Source/Generated/GB_AxB__eq_ge_bool.c | 513 + .../Source/Generated/GB_AxB__eq_ge_fp32.c | 513 + .../Source/Generated/GB_AxB__eq_ge_fp64.c | 513 + .../Source/Generated/GB_AxB__eq_ge_int16.c | 513 + .../Source/Generated/GB_AxB__eq_ge_int32.c | 513 + .../Source/Generated/GB_AxB__eq_ge_int64.c | 513 + .../Source/Generated/GB_AxB__eq_ge_int8.c | 513 + .../Source/Generated/GB_AxB__eq_ge_uint16.c | 513 + .../Source/Generated/GB_AxB__eq_ge_uint32.c | 513 + .../Source/Generated/GB_AxB__eq_ge_uint64.c | 513 + .../Source/Generated/GB_AxB__eq_ge_uint8.c | 513 + .../Source/Generated/GB_AxB__eq_gt_bool.c | 513 + .../Source/Generated/GB_AxB__eq_gt_fp32.c | 513 + .../Source/Generated/GB_AxB__eq_gt_fp64.c | 513 + .../Source/Generated/GB_AxB__eq_gt_int16.c | 513 + .../Source/Generated/GB_AxB__eq_gt_int32.c | 513 + .../Source/Generated/GB_AxB__eq_gt_int64.c | 513 + .../Source/Generated/GB_AxB__eq_gt_int8.c | 513 + .../Source/Generated/GB_AxB__eq_gt_uint16.c | 513 + .../Source/Generated/GB_AxB__eq_gt_uint32.c | 513 + .../Source/Generated/GB_AxB__eq_gt_uint64.c | 513 + .../Source/Generated/GB_AxB__eq_gt_uint8.c | 513 + .../Source/Generated/GB_AxB__eq_land_bool.c | 513 + .../Source/Generated/GB_AxB__eq_le_bool.c | 513 + .../Source/Generated/GB_AxB__eq_le_fp32.c | 513 + .../Source/Generated/GB_AxB__eq_le_fp64.c | 513 + .../Source/Generated/GB_AxB__eq_le_int16.c | 513 + .../Source/Generated/GB_AxB__eq_le_int32.c | 513 + .../Source/Generated/GB_AxB__eq_le_int64.c | 513 + .../Source/Generated/GB_AxB__eq_le_int8.c | 513 + .../Source/Generated/GB_AxB__eq_le_uint16.c | 513 + .../Source/Generated/GB_AxB__eq_le_uint32.c | 513 + .../Source/Generated/GB_AxB__eq_le_uint64.c | 513 + .../Source/Generated/GB_AxB__eq_le_uint8.c | 513 + .../Source/Generated/GB_AxB__eq_lor_bool.c | 513 + .../Source/Generated/GB_AxB__eq_lt_bool.c | 513 + .../Source/Generated/GB_AxB__eq_lt_fp32.c | 513 + .../Source/Generated/GB_AxB__eq_lt_fp64.c | 513 + .../Source/Generated/GB_AxB__eq_lt_int16.c | 513 + .../Source/Generated/GB_AxB__eq_lt_int32.c | 513 + .../Source/Generated/GB_AxB__eq_lt_int64.c | 513 + .../Source/Generated/GB_AxB__eq_lt_int8.c | 513 + .../Source/Generated/GB_AxB__eq_lt_uint16.c | 513 + .../Source/Generated/GB_AxB__eq_lt_uint32.c | 513 + .../Source/Generated/GB_AxB__eq_lt_uint64.c | 513 + .../Source/Generated/GB_AxB__eq_lt_uint8.c | 513 + .../Source/Generated/GB_AxB__eq_lxor_bool.c | 513 + .../Source/Generated/GB_AxB__eq_ne_fp32.c | 513 + .../Source/Generated/GB_AxB__eq_ne_fp64.c | 513 + .../Source/Generated/GB_AxB__eq_ne_int16.c | 513 + .../Source/Generated/GB_AxB__eq_ne_int32.c | 513 + .../Source/Generated/GB_AxB__eq_ne_int64.c | 513 + .../Source/Generated/GB_AxB__eq_ne_int8.c | 513 + .../Source/Generated/GB_AxB__eq_ne_uint16.c | 513 + .../Source/Generated/GB_AxB__eq_ne_uint32.c | 513 + .../Source/Generated/GB_AxB__eq_ne_uint64.c | 513 + .../Source/Generated/GB_AxB__eq_ne_uint8.c | 513 + .../Source/Generated/GB_AxB__eq_second_bool.c | 513 + .../Source/Generated/GB_AxB__land_eq_bool.c | 513 + .../Source/Generated/GB_AxB__land_eq_fp32.c | 513 + .../Source/Generated/GB_AxB__land_eq_fp64.c | 513 + .../Source/Generated/GB_AxB__land_eq_int16.c | 513 + .../Source/Generated/GB_AxB__land_eq_int32.c | 513 + .../Source/Generated/GB_AxB__land_eq_int64.c | 513 + .../Source/Generated/GB_AxB__land_eq_int8.c | 513 + .../Source/Generated/GB_AxB__land_eq_uint16.c | 513 + .../Source/Generated/GB_AxB__land_eq_uint32.c | 513 + .../Source/Generated/GB_AxB__land_eq_uint64.c | 513 + .../Source/Generated/GB_AxB__land_eq_uint8.c | 513 + .../Generated/GB_AxB__land_first_bool.c | 513 + .../Source/Generated/GB_AxB__land_ge_bool.c | 513 + .../Source/Generated/GB_AxB__land_ge_fp32.c | 513 + .../Source/Generated/GB_AxB__land_ge_fp64.c | 513 + .../Source/Generated/GB_AxB__land_ge_int16.c | 513 + .../Source/Generated/GB_AxB__land_ge_int32.c | 513 + .../Source/Generated/GB_AxB__land_ge_int64.c | 513 + .../Source/Generated/GB_AxB__land_ge_int8.c | 513 + .../Source/Generated/GB_AxB__land_ge_uint16.c | 513 + .../Source/Generated/GB_AxB__land_ge_uint32.c | 513 + .../Source/Generated/GB_AxB__land_ge_uint64.c | 513 + .../Source/Generated/GB_AxB__land_ge_uint8.c | 513 + .../Source/Generated/GB_AxB__land_gt_bool.c | 513 + .../Source/Generated/GB_AxB__land_gt_fp32.c | 513 + .../Source/Generated/GB_AxB__land_gt_fp64.c | 513 + .../Source/Generated/GB_AxB__land_gt_int16.c | 513 + .../Source/Generated/GB_AxB__land_gt_int32.c | 513 + .../Source/Generated/GB_AxB__land_gt_int64.c | 513 + .../Source/Generated/GB_AxB__land_gt_int8.c | 513 + .../Source/Generated/GB_AxB__land_gt_uint16.c | 513 + .../Source/Generated/GB_AxB__land_gt_uint32.c | 513 + .../Source/Generated/GB_AxB__land_gt_uint64.c | 513 + .../Source/Generated/GB_AxB__land_gt_uint8.c | 513 + .../Source/Generated/GB_AxB__land_land_bool.c | 513 + .../Source/Generated/GB_AxB__land_le_bool.c | 513 + .../Source/Generated/GB_AxB__land_le_fp32.c | 513 + .../Source/Generated/GB_AxB__land_le_fp64.c | 513 + .../Source/Generated/GB_AxB__land_le_int16.c | 513 + .../Source/Generated/GB_AxB__land_le_int32.c | 513 + .../Source/Generated/GB_AxB__land_le_int64.c | 513 + .../Source/Generated/GB_AxB__land_le_int8.c | 513 + .../Source/Generated/GB_AxB__land_le_uint16.c | 513 + .../Source/Generated/GB_AxB__land_le_uint32.c | 513 + .../Source/Generated/GB_AxB__land_le_uint64.c | 513 + .../Source/Generated/GB_AxB__land_le_uint8.c | 513 + .../Source/Generated/GB_AxB__land_lor_bool.c | 513 + .../Source/Generated/GB_AxB__land_lt_bool.c | 513 + .../Source/Generated/GB_AxB__land_lt_fp32.c | 513 + .../Source/Generated/GB_AxB__land_lt_fp64.c | 513 + .../Source/Generated/GB_AxB__land_lt_int16.c | 513 + .../Source/Generated/GB_AxB__land_lt_int32.c | 513 + .../Source/Generated/GB_AxB__land_lt_int64.c | 513 + .../Source/Generated/GB_AxB__land_lt_int8.c | 513 + .../Source/Generated/GB_AxB__land_lt_uint16.c | 513 + .../Source/Generated/GB_AxB__land_lt_uint32.c | 513 + .../Source/Generated/GB_AxB__land_lt_uint64.c | 513 + .../Source/Generated/GB_AxB__land_lt_uint8.c | 513 + .../Source/Generated/GB_AxB__land_lxor_bool.c | 513 + .../Source/Generated/GB_AxB__land_ne_fp32.c | 513 + .../Source/Generated/GB_AxB__land_ne_fp64.c | 513 + .../Source/Generated/GB_AxB__land_ne_int16.c | 513 + .../Source/Generated/GB_AxB__land_ne_int32.c | 513 + .../Source/Generated/GB_AxB__land_ne_int64.c | 513 + .../Source/Generated/GB_AxB__land_ne_int8.c | 513 + .../Source/Generated/GB_AxB__land_ne_uint16.c | 513 + .../Source/Generated/GB_AxB__land_ne_uint32.c | 513 + .../Source/Generated/GB_AxB__land_ne_uint64.c | 513 + .../Source/Generated/GB_AxB__land_ne_uint8.c | 513 + .../Generated/GB_AxB__land_second_bool.c | 513 + .../Source/Generated/GB_AxB__lor_eq_bool.c | 513 + .../Source/Generated/GB_AxB__lor_eq_fp32.c | 513 + .../Source/Generated/GB_AxB__lor_eq_fp64.c | 513 + .../Source/Generated/GB_AxB__lor_eq_int16.c | 513 + .../Source/Generated/GB_AxB__lor_eq_int32.c | 513 + .../Source/Generated/GB_AxB__lor_eq_int64.c | 513 + .../Source/Generated/GB_AxB__lor_eq_int8.c | 513 + .../Source/Generated/GB_AxB__lor_eq_uint16.c | 513 + .../Source/Generated/GB_AxB__lor_eq_uint32.c | 513 + .../Source/Generated/GB_AxB__lor_eq_uint64.c | 513 + .../Source/Generated/GB_AxB__lor_eq_uint8.c | 513 + .../Source/Generated/GB_AxB__lor_first_bool.c | 513 + .../Source/Generated/GB_AxB__lor_ge_bool.c | 513 + .../Source/Generated/GB_AxB__lor_ge_fp32.c | 513 + .../Source/Generated/GB_AxB__lor_ge_fp64.c | 513 + .../Source/Generated/GB_AxB__lor_ge_int16.c | 513 + .../Source/Generated/GB_AxB__lor_ge_int32.c | 513 + .../Source/Generated/GB_AxB__lor_ge_int64.c | 513 + .../Source/Generated/GB_AxB__lor_ge_int8.c | 513 + .../Source/Generated/GB_AxB__lor_ge_uint16.c | 513 + .../Source/Generated/GB_AxB__lor_ge_uint32.c | 513 + .../Source/Generated/GB_AxB__lor_ge_uint64.c | 513 + .../Source/Generated/GB_AxB__lor_ge_uint8.c | 513 + .../Source/Generated/GB_AxB__lor_gt_bool.c | 513 + .../Source/Generated/GB_AxB__lor_gt_fp32.c | 513 + .../Source/Generated/GB_AxB__lor_gt_fp64.c | 513 + .../Source/Generated/GB_AxB__lor_gt_int16.c | 513 + .../Source/Generated/GB_AxB__lor_gt_int32.c | 513 + .../Source/Generated/GB_AxB__lor_gt_int64.c | 513 + .../Source/Generated/GB_AxB__lor_gt_int8.c | 513 + .../Source/Generated/GB_AxB__lor_gt_uint16.c | 513 + .../Source/Generated/GB_AxB__lor_gt_uint32.c | 513 + .../Source/Generated/GB_AxB__lor_gt_uint64.c | 513 + .../Source/Generated/GB_AxB__lor_gt_uint8.c | 513 + .../Source/Generated/GB_AxB__lor_land_bool.c | 513 + .../Source/Generated/GB_AxB__lor_le_bool.c | 513 + .../Source/Generated/GB_AxB__lor_le_fp32.c | 513 + .../Source/Generated/GB_AxB__lor_le_fp64.c | 513 + .../Source/Generated/GB_AxB__lor_le_int16.c | 513 + .../Source/Generated/GB_AxB__lor_le_int32.c | 513 + .../Source/Generated/GB_AxB__lor_le_int64.c | 513 + .../Source/Generated/GB_AxB__lor_le_int8.c | 513 + .../Source/Generated/GB_AxB__lor_le_uint16.c | 513 + .../Source/Generated/GB_AxB__lor_le_uint32.c | 513 + .../Source/Generated/GB_AxB__lor_le_uint64.c | 513 + .../Source/Generated/GB_AxB__lor_le_uint8.c | 513 + .../Source/Generated/GB_AxB__lor_lor_bool.c | 513 + .../Source/Generated/GB_AxB__lor_lt_bool.c | 513 + .../Source/Generated/GB_AxB__lor_lt_fp32.c | 513 + .../Source/Generated/GB_AxB__lor_lt_fp64.c | 513 + .../Source/Generated/GB_AxB__lor_lt_int16.c | 513 + .../Source/Generated/GB_AxB__lor_lt_int32.c | 513 + .../Source/Generated/GB_AxB__lor_lt_int64.c | 513 + .../Source/Generated/GB_AxB__lor_lt_int8.c | 513 + .../Source/Generated/GB_AxB__lor_lt_uint16.c | 513 + .../Source/Generated/GB_AxB__lor_lt_uint32.c | 513 + .../Source/Generated/GB_AxB__lor_lt_uint64.c | 513 + .../Source/Generated/GB_AxB__lor_lt_uint8.c | 513 + .../Source/Generated/GB_AxB__lor_lxor_bool.c | 513 + .../Source/Generated/GB_AxB__lor_ne_fp32.c | 513 + .../Source/Generated/GB_AxB__lor_ne_fp64.c | 513 + .../Source/Generated/GB_AxB__lor_ne_int16.c | 513 + .../Source/Generated/GB_AxB__lor_ne_int32.c | 513 + .../Source/Generated/GB_AxB__lor_ne_int64.c | 513 + .../Source/Generated/GB_AxB__lor_ne_int8.c | 513 + .../Source/Generated/GB_AxB__lor_ne_uint16.c | 513 + .../Source/Generated/GB_AxB__lor_ne_uint32.c | 513 + .../Source/Generated/GB_AxB__lor_ne_uint64.c | 513 + .../Source/Generated/GB_AxB__lor_ne_uint8.c | 513 + .../Generated/GB_AxB__lor_second_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_fp32.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_fp64.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_int16.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_int32.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_int64.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_int8.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_uint16.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_uint32.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_uint64.c | 513 + .../Source/Generated/GB_AxB__lxor_eq_uint8.c | 513 + .../Generated/GB_AxB__lxor_first_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_fp32.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_fp64.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_int16.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_int32.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_int64.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_int8.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_uint16.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_uint32.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_uint64.c | 513 + .../Source/Generated/GB_AxB__lxor_ge_uint8.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_fp32.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_fp64.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_int16.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_int32.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_int64.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_int8.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_uint16.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_uint32.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_uint64.c | 513 + .../Source/Generated/GB_AxB__lxor_gt_uint8.c | 513 + .../Source/Generated/GB_AxB__lxor_land_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_le_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_le_fp32.c | 513 + .../Source/Generated/GB_AxB__lxor_le_fp64.c | 513 + .../Source/Generated/GB_AxB__lxor_le_int16.c | 513 + .../Source/Generated/GB_AxB__lxor_le_int32.c | 513 + .../Source/Generated/GB_AxB__lxor_le_int64.c | 513 + .../Source/Generated/GB_AxB__lxor_le_int8.c | 513 + .../Source/Generated/GB_AxB__lxor_le_uint16.c | 513 + .../Source/Generated/GB_AxB__lxor_le_uint32.c | 513 + .../Source/Generated/GB_AxB__lxor_le_uint64.c | 513 + .../Source/Generated/GB_AxB__lxor_le_uint8.c | 513 + .../Source/Generated/GB_AxB__lxor_lor_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_fp32.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_fp64.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_int16.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_int32.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_int64.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_int8.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_uint16.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_uint32.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_uint64.c | 513 + .../Source/Generated/GB_AxB__lxor_lt_uint8.c | 513 + .../Source/Generated/GB_AxB__lxor_lxor_bool.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_fp32.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_fp64.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_int16.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_int32.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_int64.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_int8.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_uint16.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_uint32.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_uint64.c | 513 + .../Source/Generated/GB_AxB__lxor_ne_uint8.c | 513 + .../Generated/GB_AxB__lxor_second_bool.c | 513 + .../Source/Generated/GB_AxB__max_div_fp32.c | 513 + .../Source/Generated/GB_AxB__max_div_fp64.c | 513 + .../Source/Generated/GB_AxB__max_div_int16.c | 513 + .../Source/Generated/GB_AxB__max_div_int32.c | 513 + .../Source/Generated/GB_AxB__max_div_int64.c | 513 + .../Source/Generated/GB_AxB__max_div_int8.c | 513 + .../Source/Generated/GB_AxB__max_div_uint16.c | 513 + .../Source/Generated/GB_AxB__max_div_uint32.c | 513 + .../Source/Generated/GB_AxB__max_div_uint64.c | 513 + .../Source/Generated/GB_AxB__max_div_uint8.c | 513 + .../Source/Generated/GB_AxB__max_first_fp32.c | 513 + .../Source/Generated/GB_AxB__max_first_fp64.c | 513 + .../Generated/GB_AxB__max_first_int16.c | 513 + .../Generated/GB_AxB__max_first_int32.c | 513 + .../Generated/GB_AxB__max_first_int64.c | 513 + .../Source/Generated/GB_AxB__max_first_int8.c | 513 + .../Generated/GB_AxB__max_first_uint16.c | 513 + .../Generated/GB_AxB__max_first_uint32.c | 513 + .../Generated/GB_AxB__max_first_uint64.c | 513 + .../Generated/GB_AxB__max_first_uint8.c | 513 + .../Source/Generated/GB_AxB__max_iseq_fp32.c | 513 + .../Source/Generated/GB_AxB__max_iseq_fp64.c | 513 + .../Source/Generated/GB_AxB__max_iseq_int16.c | 513 + .../Source/Generated/GB_AxB__max_iseq_int32.c | 513 + .../Source/Generated/GB_AxB__max_iseq_int64.c | 513 + .../Source/Generated/GB_AxB__max_iseq_int8.c | 513 + .../Generated/GB_AxB__max_iseq_uint16.c | 513 + .../Generated/GB_AxB__max_iseq_uint32.c | 513 + .../Generated/GB_AxB__max_iseq_uint64.c | 513 + .../Source/Generated/GB_AxB__max_iseq_uint8.c | 513 + .../Source/Generated/GB_AxB__max_isge_fp32.c | 513 + .../Source/Generated/GB_AxB__max_isge_fp64.c | 513 + .../Source/Generated/GB_AxB__max_isge_int16.c | 513 + .../Source/Generated/GB_AxB__max_isge_int32.c | 513 + .../Source/Generated/GB_AxB__max_isge_int64.c | 513 + .../Source/Generated/GB_AxB__max_isge_int8.c | 513 + .../Generated/GB_AxB__max_isge_uint16.c | 513 + .../Generated/GB_AxB__max_isge_uint32.c | 513 + .../Generated/GB_AxB__max_isge_uint64.c | 513 + .../Source/Generated/GB_AxB__max_isge_uint8.c | 513 + .../Source/Generated/GB_AxB__max_isgt_fp32.c | 513 + .../Source/Generated/GB_AxB__max_isgt_fp64.c | 513 + .../Source/Generated/GB_AxB__max_isgt_int16.c | 513 + .../Source/Generated/GB_AxB__max_isgt_int32.c | 513 + .../Source/Generated/GB_AxB__max_isgt_int64.c | 513 + .../Source/Generated/GB_AxB__max_isgt_int8.c | 513 + .../Generated/GB_AxB__max_isgt_uint16.c | 513 + .../Generated/GB_AxB__max_isgt_uint32.c | 513 + .../Generated/GB_AxB__max_isgt_uint64.c | 513 + .../Source/Generated/GB_AxB__max_isgt_uint8.c | 513 + .../Source/Generated/GB_AxB__max_isle_fp32.c | 513 + .../Source/Generated/GB_AxB__max_isle_fp64.c | 513 + .../Source/Generated/GB_AxB__max_isle_int16.c | 513 + .../Source/Generated/GB_AxB__max_isle_int32.c | 513 + .../Source/Generated/GB_AxB__max_isle_int64.c | 513 + .../Source/Generated/GB_AxB__max_isle_int8.c | 513 + .../Generated/GB_AxB__max_isle_uint16.c | 513 + .../Generated/GB_AxB__max_isle_uint32.c | 513 + .../Generated/GB_AxB__max_isle_uint64.c | 513 + .../Source/Generated/GB_AxB__max_isle_uint8.c | 513 + .../Source/Generated/GB_AxB__max_islt_fp32.c | 513 + .../Source/Generated/GB_AxB__max_islt_fp64.c | 513 + .../Source/Generated/GB_AxB__max_islt_int16.c | 513 + .../Source/Generated/GB_AxB__max_islt_int32.c | 513 + .../Source/Generated/GB_AxB__max_islt_int64.c | 513 + .../Source/Generated/GB_AxB__max_islt_int8.c | 513 + .../Generated/GB_AxB__max_islt_uint16.c | 513 + .../Generated/GB_AxB__max_islt_uint32.c | 513 + .../Generated/GB_AxB__max_islt_uint64.c | 513 + .../Source/Generated/GB_AxB__max_islt_uint8.c | 513 + .../Source/Generated/GB_AxB__max_isne_fp32.c | 513 + .../Source/Generated/GB_AxB__max_isne_fp64.c | 513 + .../Source/Generated/GB_AxB__max_isne_int16.c | 513 + .../Source/Generated/GB_AxB__max_isne_int32.c | 513 + .../Source/Generated/GB_AxB__max_isne_int64.c | 513 + .../Source/Generated/GB_AxB__max_isne_int8.c | 513 + .../Generated/GB_AxB__max_isne_uint16.c | 513 + .../Generated/GB_AxB__max_isne_uint32.c | 513 + .../Generated/GB_AxB__max_isne_uint64.c | 513 + .../Source/Generated/GB_AxB__max_isne_uint8.c | 513 + .../Source/Generated/GB_AxB__max_land_fp32.c | 513 + .../Source/Generated/GB_AxB__max_land_fp64.c | 513 + .../Source/Generated/GB_AxB__max_land_int16.c | 513 + .../Source/Generated/GB_AxB__max_land_int32.c | 513 + .../Source/Generated/GB_AxB__max_land_int64.c | 513 + .../Source/Generated/GB_AxB__max_land_int8.c | 513 + .../Generated/GB_AxB__max_land_uint16.c | 513 + .../Generated/GB_AxB__max_land_uint32.c | 513 + .../Generated/GB_AxB__max_land_uint64.c | 513 + .../Source/Generated/GB_AxB__max_land_uint8.c | 513 + .../Source/Generated/GB_AxB__max_lor_fp32.c | 513 + .../Source/Generated/GB_AxB__max_lor_fp64.c | 513 + .../Source/Generated/GB_AxB__max_lor_int16.c | 513 + .../Source/Generated/GB_AxB__max_lor_int32.c | 513 + .../Source/Generated/GB_AxB__max_lor_int64.c | 513 + .../Source/Generated/GB_AxB__max_lor_int8.c | 513 + .../Source/Generated/GB_AxB__max_lor_uint16.c | 513 + .../Source/Generated/GB_AxB__max_lor_uint32.c | 513 + .../Source/Generated/GB_AxB__max_lor_uint64.c | 513 + .../Source/Generated/GB_AxB__max_lor_uint8.c | 513 + .../Source/Generated/GB_AxB__max_lxor_fp32.c | 513 + .../Source/Generated/GB_AxB__max_lxor_fp64.c | 513 + .../Source/Generated/GB_AxB__max_lxor_int16.c | 513 + .../Source/Generated/GB_AxB__max_lxor_int32.c | 513 + .../Source/Generated/GB_AxB__max_lxor_int64.c | 513 + .../Source/Generated/GB_AxB__max_lxor_int8.c | 513 + .../Generated/GB_AxB__max_lxor_uint16.c | 513 + .../Generated/GB_AxB__max_lxor_uint32.c | 513 + .../Generated/GB_AxB__max_lxor_uint64.c | 513 + .../Source/Generated/GB_AxB__max_lxor_uint8.c | 513 + .../Source/Generated/GB_AxB__max_max_fp32.c | 513 + .../Source/Generated/GB_AxB__max_max_fp64.c | 513 + .../Source/Generated/GB_AxB__max_max_int16.c | 513 + .../Source/Generated/GB_AxB__max_max_int32.c | 513 + .../Source/Generated/GB_AxB__max_max_int64.c | 513 + .../Source/Generated/GB_AxB__max_max_int8.c | 513 + .../Source/Generated/GB_AxB__max_max_uint16.c | 513 + .../Source/Generated/GB_AxB__max_max_uint32.c | 513 + .../Source/Generated/GB_AxB__max_max_uint64.c | 513 + .../Source/Generated/GB_AxB__max_max_uint8.c | 513 + .../Source/Generated/GB_AxB__max_min_fp32.c | 513 + .../Source/Generated/GB_AxB__max_min_fp64.c | 513 + .../Source/Generated/GB_AxB__max_min_int16.c | 513 + .../Source/Generated/GB_AxB__max_min_int32.c | 513 + .../Source/Generated/GB_AxB__max_min_int64.c | 513 + .../Source/Generated/GB_AxB__max_min_int8.c | 513 + .../Source/Generated/GB_AxB__max_min_uint16.c | 513 + .../Source/Generated/GB_AxB__max_min_uint32.c | 513 + .../Source/Generated/GB_AxB__max_min_uint64.c | 513 + .../Source/Generated/GB_AxB__max_min_uint8.c | 513 + .../Source/Generated/GB_AxB__max_minus_fp32.c | 513 + .../Source/Generated/GB_AxB__max_minus_fp64.c | 513 + .../Generated/GB_AxB__max_minus_int16.c | 513 + .../Generated/GB_AxB__max_minus_int32.c | 513 + .../Generated/GB_AxB__max_minus_int64.c | 513 + .../Source/Generated/GB_AxB__max_minus_int8.c | 513 + .../Generated/GB_AxB__max_minus_uint16.c | 513 + .../Generated/GB_AxB__max_minus_uint32.c | 513 + .../Generated/GB_AxB__max_minus_uint64.c | 513 + .../Generated/GB_AxB__max_minus_uint8.c | 513 + .../Source/Generated/GB_AxB__max_plus_fp32.c | 513 + .../Source/Generated/GB_AxB__max_plus_fp64.c | 513 + .../Source/Generated/GB_AxB__max_plus_int16.c | 513 + .../Source/Generated/GB_AxB__max_plus_int32.c | 513 + .../Source/Generated/GB_AxB__max_plus_int64.c | 513 + .../Source/Generated/GB_AxB__max_plus_int8.c | 513 + .../Generated/GB_AxB__max_plus_uint16.c | 513 + .../Generated/GB_AxB__max_plus_uint32.c | 513 + .../Generated/GB_AxB__max_plus_uint64.c | 513 + .../Source/Generated/GB_AxB__max_plus_uint8.c | 513 + .../Generated/GB_AxB__max_second_fp32.c | 513 + .../Generated/GB_AxB__max_second_fp64.c | 513 + .../Generated/GB_AxB__max_second_int16.c | 513 + .../Generated/GB_AxB__max_second_int32.c | 513 + .../Generated/GB_AxB__max_second_int64.c | 513 + .../Generated/GB_AxB__max_second_int8.c | 513 + .../Generated/GB_AxB__max_second_uint16.c | 513 + .../Generated/GB_AxB__max_second_uint32.c | 513 + .../Generated/GB_AxB__max_second_uint64.c | 513 + .../Generated/GB_AxB__max_second_uint8.c | 513 + .../Source/Generated/GB_AxB__max_times_fp32.c | 513 + .../Source/Generated/GB_AxB__max_times_fp64.c | 513 + .../Generated/GB_AxB__max_times_int16.c | 513 + .../Generated/GB_AxB__max_times_int32.c | 513 + .../Generated/GB_AxB__max_times_int64.c | 513 + .../Source/Generated/GB_AxB__max_times_int8.c | 513 + .../Generated/GB_AxB__max_times_uint16.c | 513 + .../Generated/GB_AxB__max_times_uint32.c | 513 + .../Generated/GB_AxB__max_times_uint64.c | 513 + .../Generated/GB_AxB__max_times_uint8.c | 513 + .../Source/Generated/GB_AxB__min_div_fp32.c | 513 + .../Source/Generated/GB_AxB__min_div_fp64.c | 513 + .../Source/Generated/GB_AxB__min_div_int16.c | 513 + .../Source/Generated/GB_AxB__min_div_int32.c | 513 + .../Source/Generated/GB_AxB__min_div_int64.c | 513 + .../Source/Generated/GB_AxB__min_div_int8.c | 513 + .../Source/Generated/GB_AxB__min_div_uint16.c | 513 + .../Source/Generated/GB_AxB__min_div_uint32.c | 513 + .../Source/Generated/GB_AxB__min_div_uint64.c | 513 + .../Source/Generated/GB_AxB__min_div_uint8.c | 513 + .../Source/Generated/GB_AxB__min_first_fp32.c | 513 + .../Source/Generated/GB_AxB__min_first_fp64.c | 513 + .../Generated/GB_AxB__min_first_int16.c | 513 + .../Generated/GB_AxB__min_first_int32.c | 513 + .../Generated/GB_AxB__min_first_int64.c | 513 + .../Source/Generated/GB_AxB__min_first_int8.c | 513 + .../Generated/GB_AxB__min_first_uint16.c | 513 + .../Generated/GB_AxB__min_first_uint32.c | 513 + .../Generated/GB_AxB__min_first_uint64.c | 513 + .../Generated/GB_AxB__min_first_uint8.c | 513 + .../Source/Generated/GB_AxB__min_iseq_fp32.c | 513 + .../Source/Generated/GB_AxB__min_iseq_fp64.c | 513 + .../Source/Generated/GB_AxB__min_iseq_int16.c | 513 + .../Source/Generated/GB_AxB__min_iseq_int32.c | 513 + .../Source/Generated/GB_AxB__min_iseq_int64.c | 513 + .../Source/Generated/GB_AxB__min_iseq_int8.c | 513 + .../Generated/GB_AxB__min_iseq_uint16.c | 513 + .../Generated/GB_AxB__min_iseq_uint32.c | 513 + .../Generated/GB_AxB__min_iseq_uint64.c | 513 + .../Source/Generated/GB_AxB__min_iseq_uint8.c | 513 + .../Source/Generated/GB_AxB__min_isge_fp32.c | 513 + .../Source/Generated/GB_AxB__min_isge_fp64.c | 513 + .../Source/Generated/GB_AxB__min_isge_int16.c | 513 + .../Source/Generated/GB_AxB__min_isge_int32.c | 513 + .../Source/Generated/GB_AxB__min_isge_int64.c | 513 + .../Source/Generated/GB_AxB__min_isge_int8.c | 513 + .../Generated/GB_AxB__min_isge_uint16.c | 513 + .../Generated/GB_AxB__min_isge_uint32.c | 513 + .../Generated/GB_AxB__min_isge_uint64.c | 513 + .../Source/Generated/GB_AxB__min_isge_uint8.c | 513 + .../Source/Generated/GB_AxB__min_isgt_fp32.c | 513 + .../Source/Generated/GB_AxB__min_isgt_fp64.c | 513 + .../Source/Generated/GB_AxB__min_isgt_int16.c | 513 + .../Source/Generated/GB_AxB__min_isgt_int32.c | 513 + .../Source/Generated/GB_AxB__min_isgt_int64.c | 513 + .../Source/Generated/GB_AxB__min_isgt_int8.c | 513 + .../Generated/GB_AxB__min_isgt_uint16.c | 513 + .../Generated/GB_AxB__min_isgt_uint32.c | 513 + .../Generated/GB_AxB__min_isgt_uint64.c | 513 + .../Source/Generated/GB_AxB__min_isgt_uint8.c | 513 + .../Source/Generated/GB_AxB__min_isle_fp32.c | 513 + .../Source/Generated/GB_AxB__min_isle_fp64.c | 513 + .../Source/Generated/GB_AxB__min_isle_int16.c | 513 + .../Source/Generated/GB_AxB__min_isle_int32.c | 513 + .../Source/Generated/GB_AxB__min_isle_int64.c | 513 + .../Source/Generated/GB_AxB__min_isle_int8.c | 513 + .../Generated/GB_AxB__min_isle_uint16.c | 513 + .../Generated/GB_AxB__min_isle_uint32.c | 513 + .../Generated/GB_AxB__min_isle_uint64.c | 513 + .../Source/Generated/GB_AxB__min_isle_uint8.c | 513 + .../Source/Generated/GB_AxB__min_islt_fp32.c | 513 + .../Source/Generated/GB_AxB__min_islt_fp64.c | 513 + .../Source/Generated/GB_AxB__min_islt_int16.c | 513 + .../Source/Generated/GB_AxB__min_islt_int32.c | 513 + .../Source/Generated/GB_AxB__min_islt_int64.c | 513 + .../Source/Generated/GB_AxB__min_islt_int8.c | 513 + .../Generated/GB_AxB__min_islt_uint16.c | 513 + .../Generated/GB_AxB__min_islt_uint32.c | 513 + .../Generated/GB_AxB__min_islt_uint64.c | 513 + .../Source/Generated/GB_AxB__min_islt_uint8.c | 513 + .../Source/Generated/GB_AxB__min_isne_fp32.c | 513 + .../Source/Generated/GB_AxB__min_isne_fp64.c | 513 + .../Source/Generated/GB_AxB__min_isne_int16.c | 513 + .../Source/Generated/GB_AxB__min_isne_int32.c | 513 + .../Source/Generated/GB_AxB__min_isne_int64.c | 513 + .../Source/Generated/GB_AxB__min_isne_int8.c | 513 + .../Generated/GB_AxB__min_isne_uint16.c | 513 + .../Generated/GB_AxB__min_isne_uint32.c | 513 + .../Generated/GB_AxB__min_isne_uint64.c | 513 + .../Source/Generated/GB_AxB__min_isne_uint8.c | 513 + .../Source/Generated/GB_AxB__min_land_fp32.c | 513 + .../Source/Generated/GB_AxB__min_land_fp64.c | 513 + .../Source/Generated/GB_AxB__min_land_int16.c | 513 + .../Source/Generated/GB_AxB__min_land_int32.c | 513 + .../Source/Generated/GB_AxB__min_land_int64.c | 513 + .../Source/Generated/GB_AxB__min_land_int8.c | 513 + .../Generated/GB_AxB__min_land_uint16.c | 513 + .../Generated/GB_AxB__min_land_uint32.c | 513 + .../Generated/GB_AxB__min_land_uint64.c | 513 + .../Source/Generated/GB_AxB__min_land_uint8.c | 513 + .../Source/Generated/GB_AxB__min_lor_fp32.c | 513 + .../Source/Generated/GB_AxB__min_lor_fp64.c | 513 + .../Source/Generated/GB_AxB__min_lor_int16.c | 513 + .../Source/Generated/GB_AxB__min_lor_int32.c | 513 + .../Source/Generated/GB_AxB__min_lor_int64.c | 513 + .../Source/Generated/GB_AxB__min_lor_int8.c | 513 + .../Source/Generated/GB_AxB__min_lor_uint16.c | 513 + .../Source/Generated/GB_AxB__min_lor_uint32.c | 513 + .../Source/Generated/GB_AxB__min_lor_uint64.c | 513 + .../Source/Generated/GB_AxB__min_lor_uint8.c | 513 + .../Source/Generated/GB_AxB__min_lxor_fp32.c | 513 + .../Source/Generated/GB_AxB__min_lxor_fp64.c | 513 + .../Source/Generated/GB_AxB__min_lxor_int16.c | 513 + .../Source/Generated/GB_AxB__min_lxor_int32.c | 513 + .../Source/Generated/GB_AxB__min_lxor_int64.c | 513 + .../Source/Generated/GB_AxB__min_lxor_int8.c | 513 + .../Generated/GB_AxB__min_lxor_uint16.c | 513 + .../Generated/GB_AxB__min_lxor_uint32.c | 513 + .../Generated/GB_AxB__min_lxor_uint64.c | 513 + .../Source/Generated/GB_AxB__min_lxor_uint8.c | 513 + .../Source/Generated/GB_AxB__min_max_fp32.c | 513 + .../Source/Generated/GB_AxB__min_max_fp64.c | 513 + .../Source/Generated/GB_AxB__min_max_int16.c | 513 + .../Source/Generated/GB_AxB__min_max_int32.c | 513 + .../Source/Generated/GB_AxB__min_max_int64.c | 513 + .../Source/Generated/GB_AxB__min_max_int8.c | 513 + .../Source/Generated/GB_AxB__min_max_uint16.c | 513 + .../Source/Generated/GB_AxB__min_max_uint32.c | 513 + .../Source/Generated/GB_AxB__min_max_uint64.c | 513 + .../Source/Generated/GB_AxB__min_max_uint8.c | 513 + .../Source/Generated/GB_AxB__min_min_fp32.c | 513 + .../Source/Generated/GB_AxB__min_min_fp64.c | 513 + .../Source/Generated/GB_AxB__min_min_int16.c | 513 + .../Source/Generated/GB_AxB__min_min_int32.c | 513 + .../Source/Generated/GB_AxB__min_min_int64.c | 513 + .../Source/Generated/GB_AxB__min_min_int8.c | 513 + .../Source/Generated/GB_AxB__min_min_uint16.c | 513 + .../Source/Generated/GB_AxB__min_min_uint32.c | 513 + .../Source/Generated/GB_AxB__min_min_uint64.c | 513 + .../Source/Generated/GB_AxB__min_min_uint8.c | 513 + .../Source/Generated/GB_AxB__min_minus_fp32.c | 513 + .../Source/Generated/GB_AxB__min_minus_fp64.c | 513 + .../Generated/GB_AxB__min_minus_int16.c | 513 + .../Generated/GB_AxB__min_minus_int32.c | 513 + .../Generated/GB_AxB__min_minus_int64.c | 513 + .../Source/Generated/GB_AxB__min_minus_int8.c | 513 + .../Generated/GB_AxB__min_minus_uint16.c | 513 + .../Generated/GB_AxB__min_minus_uint32.c | 513 + .../Generated/GB_AxB__min_minus_uint64.c | 513 + .../Generated/GB_AxB__min_minus_uint8.c | 513 + .../Source/Generated/GB_AxB__min_plus_fp32.c | 513 + .../Source/Generated/GB_AxB__min_plus_fp64.c | 513 + .../Source/Generated/GB_AxB__min_plus_int16.c | 513 + .../Source/Generated/GB_AxB__min_plus_int32.c | 513 + .../Source/Generated/GB_AxB__min_plus_int64.c | 513 + .../Source/Generated/GB_AxB__min_plus_int8.c | 513 + .../Generated/GB_AxB__min_plus_uint16.c | 513 + .../Generated/GB_AxB__min_plus_uint32.c | 513 + .../Generated/GB_AxB__min_plus_uint64.c | 513 + .../Source/Generated/GB_AxB__min_plus_uint8.c | 513 + .../Generated/GB_AxB__min_second_fp32.c | 513 + .../Generated/GB_AxB__min_second_fp64.c | 513 + .../Generated/GB_AxB__min_second_int16.c | 513 + .../Generated/GB_AxB__min_second_int32.c | 513 + .../Generated/GB_AxB__min_second_int64.c | 513 + .../Generated/GB_AxB__min_second_int8.c | 513 + .../Generated/GB_AxB__min_second_uint16.c | 513 + .../Generated/GB_AxB__min_second_uint32.c | 513 + .../Generated/GB_AxB__min_second_uint64.c | 513 + .../Generated/GB_AxB__min_second_uint8.c | 513 + .../Source/Generated/GB_AxB__min_times_fp32.c | 513 + .../Source/Generated/GB_AxB__min_times_fp64.c | 513 + .../Generated/GB_AxB__min_times_int16.c | 513 + .../Generated/GB_AxB__min_times_int32.c | 513 + .../Generated/GB_AxB__min_times_int64.c | 513 + .../Source/Generated/GB_AxB__min_times_int8.c | 513 + .../Generated/GB_AxB__min_times_uint16.c | 513 + .../Generated/GB_AxB__min_times_uint32.c | 513 + .../Generated/GB_AxB__min_times_uint64.c | 513 + .../Generated/GB_AxB__min_times_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_div_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_div_fp64.c | 513 + .../Source/Generated/GB_AxB__plus_div_int16.c | 513 + .../Source/Generated/GB_AxB__plus_div_int32.c | 513 + .../Source/Generated/GB_AxB__plus_div_int64.c | 513 + .../Source/Generated/GB_AxB__plus_div_int8.c | 513 + .../Generated/GB_AxB__plus_div_uint16.c | 513 + .../Generated/GB_AxB__plus_div_uint32.c | 513 + .../Generated/GB_AxB__plus_div_uint64.c | 513 + .../Source/Generated/GB_AxB__plus_div_uint8.c | 513 + .../Generated/GB_AxB__plus_first_fp32.c | 513 + .../Generated/GB_AxB__plus_first_fp64.c | 513 + .../Generated/GB_AxB__plus_first_int16.c | 513 + .../Generated/GB_AxB__plus_first_int32.c | 513 + .../Generated/GB_AxB__plus_first_int64.c | 513 + .../Generated/GB_AxB__plus_first_int8.c | 513 + .../Generated/GB_AxB__plus_first_uint16.c | 513 + .../Generated/GB_AxB__plus_first_uint32.c | 513 + .../Generated/GB_AxB__plus_first_uint64.c | 513 + .../Generated/GB_AxB__plus_first_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_iseq_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_iseq_fp64.c | 513 + .../Generated/GB_AxB__plus_iseq_int16.c | 513 + .../Generated/GB_AxB__plus_iseq_int32.c | 513 + .../Generated/GB_AxB__plus_iseq_int64.c | 513 + .../Source/Generated/GB_AxB__plus_iseq_int8.c | 513 + .../Generated/GB_AxB__plus_iseq_uint16.c | 513 + .../Generated/GB_AxB__plus_iseq_uint32.c | 513 + .../Generated/GB_AxB__plus_iseq_uint64.c | 513 + .../Generated/GB_AxB__plus_iseq_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_isge_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_isge_fp64.c | 513 + .../Generated/GB_AxB__plus_isge_int16.c | 513 + .../Generated/GB_AxB__plus_isge_int32.c | 513 + .../Generated/GB_AxB__plus_isge_int64.c | 513 + .../Source/Generated/GB_AxB__plus_isge_int8.c | 513 + .../Generated/GB_AxB__plus_isge_uint16.c | 513 + .../Generated/GB_AxB__plus_isge_uint32.c | 513 + .../Generated/GB_AxB__plus_isge_uint64.c | 513 + .../Generated/GB_AxB__plus_isge_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_isgt_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_isgt_fp64.c | 513 + .../Generated/GB_AxB__plus_isgt_int16.c | 513 + .../Generated/GB_AxB__plus_isgt_int32.c | 513 + .../Generated/GB_AxB__plus_isgt_int64.c | 513 + .../Source/Generated/GB_AxB__plus_isgt_int8.c | 513 + .../Generated/GB_AxB__plus_isgt_uint16.c | 513 + .../Generated/GB_AxB__plus_isgt_uint32.c | 513 + .../Generated/GB_AxB__plus_isgt_uint64.c | 513 + .../Generated/GB_AxB__plus_isgt_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_isle_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_isle_fp64.c | 513 + .../Generated/GB_AxB__plus_isle_int16.c | 513 + .../Generated/GB_AxB__plus_isle_int32.c | 513 + .../Generated/GB_AxB__plus_isle_int64.c | 513 + .../Source/Generated/GB_AxB__plus_isle_int8.c | 513 + .../Generated/GB_AxB__plus_isle_uint16.c | 513 + .../Generated/GB_AxB__plus_isle_uint32.c | 513 + .../Generated/GB_AxB__plus_isle_uint64.c | 513 + .../Generated/GB_AxB__plus_isle_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_islt_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_islt_fp64.c | 513 + .../Generated/GB_AxB__plus_islt_int16.c | 513 + .../Generated/GB_AxB__plus_islt_int32.c | 513 + .../Generated/GB_AxB__plus_islt_int64.c | 513 + .../Source/Generated/GB_AxB__plus_islt_int8.c | 513 + .../Generated/GB_AxB__plus_islt_uint16.c | 513 + .../Generated/GB_AxB__plus_islt_uint32.c | 513 + .../Generated/GB_AxB__plus_islt_uint64.c | 513 + .../Generated/GB_AxB__plus_islt_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_isne_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_isne_fp64.c | 513 + .../Generated/GB_AxB__plus_isne_int16.c | 513 + .../Generated/GB_AxB__plus_isne_int32.c | 513 + .../Generated/GB_AxB__plus_isne_int64.c | 513 + .../Source/Generated/GB_AxB__plus_isne_int8.c | 513 + .../Generated/GB_AxB__plus_isne_uint16.c | 513 + .../Generated/GB_AxB__plus_isne_uint32.c | 513 + .../Generated/GB_AxB__plus_isne_uint64.c | 513 + .../Generated/GB_AxB__plus_isne_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_land_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_land_fp64.c | 513 + .../Generated/GB_AxB__plus_land_int16.c | 513 + .../Generated/GB_AxB__plus_land_int32.c | 513 + .../Generated/GB_AxB__plus_land_int64.c | 513 + .../Source/Generated/GB_AxB__plus_land_int8.c | 513 + .../Generated/GB_AxB__plus_land_uint16.c | 513 + .../Generated/GB_AxB__plus_land_uint32.c | 513 + .../Generated/GB_AxB__plus_land_uint64.c | 513 + .../Generated/GB_AxB__plus_land_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_lor_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_lor_fp64.c | 513 + .../Source/Generated/GB_AxB__plus_lor_int16.c | 513 + .../Source/Generated/GB_AxB__plus_lor_int32.c | 513 + .../Source/Generated/GB_AxB__plus_lor_int64.c | 513 + .../Source/Generated/GB_AxB__plus_lor_int8.c | 513 + .../Generated/GB_AxB__plus_lor_uint16.c | 513 + .../Generated/GB_AxB__plus_lor_uint32.c | 513 + .../Generated/GB_AxB__plus_lor_uint64.c | 513 + .../Source/Generated/GB_AxB__plus_lor_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_lxor_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_lxor_fp64.c | 513 + .../Generated/GB_AxB__plus_lxor_int16.c | 513 + .../Generated/GB_AxB__plus_lxor_int32.c | 513 + .../Generated/GB_AxB__plus_lxor_int64.c | 513 + .../Source/Generated/GB_AxB__plus_lxor_int8.c | 513 + .../Generated/GB_AxB__plus_lxor_uint16.c | 513 + .../Generated/GB_AxB__plus_lxor_uint32.c | 513 + .../Generated/GB_AxB__plus_lxor_uint64.c | 513 + .../Generated/GB_AxB__plus_lxor_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_max_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_max_fp64.c | 513 + .../Source/Generated/GB_AxB__plus_max_int16.c | 513 + .../Source/Generated/GB_AxB__plus_max_int32.c | 513 + .../Source/Generated/GB_AxB__plus_max_int64.c | 513 + .../Source/Generated/GB_AxB__plus_max_int8.c | 513 + .../Generated/GB_AxB__plus_max_uint16.c | 513 + .../Generated/GB_AxB__plus_max_uint32.c | 513 + .../Generated/GB_AxB__plus_max_uint64.c | 513 + .../Source/Generated/GB_AxB__plus_max_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_min_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_min_fp64.c | 513 + .../Source/Generated/GB_AxB__plus_min_int16.c | 513 + .../Source/Generated/GB_AxB__plus_min_int32.c | 513 + .../Source/Generated/GB_AxB__plus_min_int64.c | 513 + .../Source/Generated/GB_AxB__plus_min_int8.c | 513 + .../Generated/GB_AxB__plus_min_uint16.c | 513 + .../Generated/GB_AxB__plus_min_uint32.c | 513 + .../Generated/GB_AxB__plus_min_uint64.c | 513 + .../Source/Generated/GB_AxB__plus_min_uint8.c | 513 + .../Generated/GB_AxB__plus_minus_fp32.c | 513 + .../Generated/GB_AxB__plus_minus_fp64.c | 513 + .../Generated/GB_AxB__plus_minus_int16.c | 513 + .../Generated/GB_AxB__plus_minus_int32.c | 513 + .../Generated/GB_AxB__plus_minus_int64.c | 513 + .../Generated/GB_AxB__plus_minus_int8.c | 513 + .../Generated/GB_AxB__plus_minus_uint16.c | 513 + .../Generated/GB_AxB__plus_minus_uint32.c | 513 + .../Generated/GB_AxB__plus_minus_uint64.c | 513 + .../Generated/GB_AxB__plus_minus_uint8.c | 513 + .../Source/Generated/GB_AxB__plus_plus_fp32.c | 513 + .../Source/Generated/GB_AxB__plus_plus_fp64.c | 513 + .../Generated/GB_AxB__plus_plus_int16.c | 513 + .../Generated/GB_AxB__plus_plus_int32.c | 513 + .../Generated/GB_AxB__plus_plus_int64.c | 513 + .../Source/Generated/GB_AxB__plus_plus_int8.c | 513 + .../Generated/GB_AxB__plus_plus_uint16.c | 513 + .../Generated/GB_AxB__plus_plus_uint32.c | 513 + .../Generated/GB_AxB__plus_plus_uint64.c | 513 + .../Generated/GB_AxB__plus_plus_uint8.c | 513 + .../Generated/GB_AxB__plus_second_fp32.c | 513 + .../Generated/GB_AxB__plus_second_fp64.c | 513 + .../Generated/GB_AxB__plus_second_int16.c | 513 + .../Generated/GB_AxB__plus_second_int32.c | 513 + .../Generated/GB_AxB__plus_second_int64.c | 513 + .../Generated/GB_AxB__plus_second_int8.c | 513 + .../Generated/GB_AxB__plus_second_uint16.c | 513 + .../Generated/GB_AxB__plus_second_uint32.c | 513 + .../Generated/GB_AxB__plus_second_uint64.c | 513 + .../Generated/GB_AxB__plus_second_uint8.c | 513 + .../Generated/GB_AxB__plus_times_fp32.c | 513 + .../Generated/GB_AxB__plus_times_fp64.c | 513 + .../Generated/GB_AxB__plus_times_int16.c | 513 + .../Generated/GB_AxB__plus_times_int32.c | 513 + .../Generated/GB_AxB__plus_times_int64.c | 513 + .../Generated/GB_AxB__plus_times_int8.c | 513 + .../Generated/GB_AxB__plus_times_uint16.c | 513 + .../Generated/GB_AxB__plus_times_uint32.c | 513 + .../Generated/GB_AxB__plus_times_uint64.c | 513 + .../Generated/GB_AxB__plus_times_uint8.c | 513 + .../Source/Generated/GB_AxB__semirings.h | 17289 ++++++++++++++++ .../Source/Generated/GB_AxB__times_div_fp32.c | 513 + .../Source/Generated/GB_AxB__times_div_fp64.c | 513 + .../Generated/GB_AxB__times_div_int16.c | 513 + .../Generated/GB_AxB__times_div_int32.c | 513 + .../Generated/GB_AxB__times_div_int64.c | 513 + .../Source/Generated/GB_AxB__times_div_int8.c | 513 + .../Generated/GB_AxB__times_div_uint16.c | 513 + .../Generated/GB_AxB__times_div_uint32.c | 513 + .../Generated/GB_AxB__times_div_uint64.c | 513 + .../Generated/GB_AxB__times_div_uint8.c | 513 + .../Generated/GB_AxB__times_first_fp32.c | 513 + .../Generated/GB_AxB__times_first_fp64.c | 513 + .../Generated/GB_AxB__times_first_int16.c | 513 + .../Generated/GB_AxB__times_first_int32.c | 513 + .../Generated/GB_AxB__times_first_int64.c | 513 + .../Generated/GB_AxB__times_first_int8.c | 513 + .../Generated/GB_AxB__times_first_uint16.c | 513 + .../Generated/GB_AxB__times_first_uint32.c | 513 + .../Generated/GB_AxB__times_first_uint64.c | 513 + .../Generated/GB_AxB__times_first_uint8.c | 513 + .../Generated/GB_AxB__times_iseq_fp32.c | 513 + .../Generated/GB_AxB__times_iseq_fp64.c | 513 + .../Generated/GB_AxB__times_iseq_int16.c | 513 + .../Generated/GB_AxB__times_iseq_int32.c | 513 + .../Generated/GB_AxB__times_iseq_int64.c | 513 + .../Generated/GB_AxB__times_iseq_int8.c | 513 + .../Generated/GB_AxB__times_iseq_uint16.c | 513 + .../Generated/GB_AxB__times_iseq_uint32.c | 513 + .../Generated/GB_AxB__times_iseq_uint64.c | 513 + .../Generated/GB_AxB__times_iseq_uint8.c | 513 + .../Generated/GB_AxB__times_isge_fp32.c | 513 + .../Generated/GB_AxB__times_isge_fp64.c | 513 + .../Generated/GB_AxB__times_isge_int16.c | 513 + .../Generated/GB_AxB__times_isge_int32.c | 513 + .../Generated/GB_AxB__times_isge_int64.c | 513 + .../Generated/GB_AxB__times_isge_int8.c | 513 + .../Generated/GB_AxB__times_isge_uint16.c | 513 + .../Generated/GB_AxB__times_isge_uint32.c | 513 + .../Generated/GB_AxB__times_isge_uint64.c | 513 + .../Generated/GB_AxB__times_isge_uint8.c | 513 + .../Generated/GB_AxB__times_isgt_fp32.c | 513 + .../Generated/GB_AxB__times_isgt_fp64.c | 513 + .../Generated/GB_AxB__times_isgt_int16.c | 513 + .../Generated/GB_AxB__times_isgt_int32.c | 513 + .../Generated/GB_AxB__times_isgt_int64.c | 513 + .../Generated/GB_AxB__times_isgt_int8.c | 513 + .../Generated/GB_AxB__times_isgt_uint16.c | 513 + .../Generated/GB_AxB__times_isgt_uint32.c | 513 + .../Generated/GB_AxB__times_isgt_uint64.c | 513 + .../Generated/GB_AxB__times_isgt_uint8.c | 513 + .../Generated/GB_AxB__times_isle_fp32.c | 513 + .../Generated/GB_AxB__times_isle_fp64.c | 513 + .../Generated/GB_AxB__times_isle_int16.c | 513 + .../Generated/GB_AxB__times_isle_int32.c | 513 + .../Generated/GB_AxB__times_isle_int64.c | 513 + .../Generated/GB_AxB__times_isle_int8.c | 513 + .../Generated/GB_AxB__times_isle_uint16.c | 513 + .../Generated/GB_AxB__times_isle_uint32.c | 513 + .../Generated/GB_AxB__times_isle_uint64.c | 513 + .../Generated/GB_AxB__times_isle_uint8.c | 513 + .../Generated/GB_AxB__times_islt_fp32.c | 513 + .../Generated/GB_AxB__times_islt_fp64.c | 513 + .../Generated/GB_AxB__times_islt_int16.c | 513 + .../Generated/GB_AxB__times_islt_int32.c | 513 + .../Generated/GB_AxB__times_islt_int64.c | 513 + .../Generated/GB_AxB__times_islt_int8.c | 513 + .../Generated/GB_AxB__times_islt_uint16.c | 513 + .../Generated/GB_AxB__times_islt_uint32.c | 513 + .../Generated/GB_AxB__times_islt_uint64.c | 513 + .../Generated/GB_AxB__times_islt_uint8.c | 513 + .../Generated/GB_AxB__times_isne_fp32.c | 513 + .../Generated/GB_AxB__times_isne_fp64.c | 513 + .../Generated/GB_AxB__times_isne_int16.c | 513 + .../Generated/GB_AxB__times_isne_int32.c | 513 + .../Generated/GB_AxB__times_isne_int64.c | 513 + .../Generated/GB_AxB__times_isne_int8.c | 513 + .../Generated/GB_AxB__times_isne_uint16.c | 513 + .../Generated/GB_AxB__times_isne_uint32.c | 513 + .../Generated/GB_AxB__times_isne_uint64.c | 513 + .../Generated/GB_AxB__times_isne_uint8.c | 513 + .../Generated/GB_AxB__times_land_fp32.c | 513 + .../Generated/GB_AxB__times_land_fp64.c | 513 + .../Generated/GB_AxB__times_land_int16.c | 513 + .../Generated/GB_AxB__times_land_int32.c | 513 + .../Generated/GB_AxB__times_land_int64.c | 513 + .../Generated/GB_AxB__times_land_int8.c | 513 + .../Generated/GB_AxB__times_land_uint16.c | 513 + .../Generated/GB_AxB__times_land_uint32.c | 513 + .../Generated/GB_AxB__times_land_uint64.c | 513 + .../Generated/GB_AxB__times_land_uint8.c | 513 + .../Source/Generated/GB_AxB__times_lor_fp32.c | 513 + .../Source/Generated/GB_AxB__times_lor_fp64.c | 513 + .../Generated/GB_AxB__times_lor_int16.c | 513 + .../Generated/GB_AxB__times_lor_int32.c | 513 + .../Generated/GB_AxB__times_lor_int64.c | 513 + .../Source/Generated/GB_AxB__times_lor_int8.c | 513 + .../Generated/GB_AxB__times_lor_uint16.c | 513 + .../Generated/GB_AxB__times_lor_uint32.c | 513 + .../Generated/GB_AxB__times_lor_uint64.c | 513 + .../Generated/GB_AxB__times_lor_uint8.c | 513 + .../Generated/GB_AxB__times_lxor_fp32.c | 513 + .../Generated/GB_AxB__times_lxor_fp64.c | 513 + .../Generated/GB_AxB__times_lxor_int16.c | 513 + .../Generated/GB_AxB__times_lxor_int32.c | 513 + .../Generated/GB_AxB__times_lxor_int64.c | 513 + .../Generated/GB_AxB__times_lxor_int8.c | 513 + .../Generated/GB_AxB__times_lxor_uint16.c | 513 + .../Generated/GB_AxB__times_lxor_uint32.c | 513 + .../Generated/GB_AxB__times_lxor_uint64.c | 513 + .../Generated/GB_AxB__times_lxor_uint8.c | 513 + .../Source/Generated/GB_AxB__times_max_fp32.c | 513 + .../Source/Generated/GB_AxB__times_max_fp64.c | 513 + .../Generated/GB_AxB__times_max_int16.c | 513 + .../Generated/GB_AxB__times_max_int32.c | 513 + .../Generated/GB_AxB__times_max_int64.c | 513 + .../Source/Generated/GB_AxB__times_max_int8.c | 513 + .../Generated/GB_AxB__times_max_uint16.c | 513 + .../Generated/GB_AxB__times_max_uint32.c | 513 + .../Generated/GB_AxB__times_max_uint64.c | 513 + .../Generated/GB_AxB__times_max_uint8.c | 513 + .../Source/Generated/GB_AxB__times_min_fp32.c | 513 + .../Source/Generated/GB_AxB__times_min_fp64.c | 513 + .../Generated/GB_AxB__times_min_int16.c | 513 + .../Generated/GB_AxB__times_min_int32.c | 513 + .../Generated/GB_AxB__times_min_int64.c | 513 + .../Source/Generated/GB_AxB__times_min_int8.c | 513 + .../Generated/GB_AxB__times_min_uint16.c | 513 + .../Generated/GB_AxB__times_min_uint32.c | 513 + .../Generated/GB_AxB__times_min_uint64.c | 513 + .../Generated/GB_AxB__times_min_uint8.c | 513 + .../Generated/GB_AxB__times_minus_fp32.c | 513 + .../Generated/GB_AxB__times_minus_fp64.c | 513 + .../Generated/GB_AxB__times_minus_int16.c | 513 + .../Generated/GB_AxB__times_minus_int32.c | 513 + .../Generated/GB_AxB__times_minus_int64.c | 513 + .../Generated/GB_AxB__times_minus_int8.c | 513 + .../Generated/GB_AxB__times_minus_uint16.c | 513 + .../Generated/GB_AxB__times_minus_uint32.c | 513 + .../Generated/GB_AxB__times_minus_uint64.c | 513 + .../Generated/GB_AxB__times_minus_uint8.c | 513 + .../Generated/GB_AxB__times_plus_fp32.c | 513 + .../Generated/GB_AxB__times_plus_fp64.c | 513 + .../Generated/GB_AxB__times_plus_int16.c | 513 + .../Generated/GB_AxB__times_plus_int32.c | 513 + .../Generated/GB_AxB__times_plus_int64.c | 513 + .../Generated/GB_AxB__times_plus_int8.c | 513 + .../Generated/GB_AxB__times_plus_uint16.c | 513 + .../Generated/GB_AxB__times_plus_uint32.c | 513 + .../Generated/GB_AxB__times_plus_uint64.c | 513 + .../Generated/GB_AxB__times_plus_uint8.c | 513 + .../Generated/GB_AxB__times_second_fp32.c | 513 + .../Generated/GB_AxB__times_second_fp64.c | 513 + .../Generated/GB_AxB__times_second_int16.c | 513 + .../Generated/GB_AxB__times_second_int32.c | 513 + .../Generated/GB_AxB__times_second_int64.c | 513 + .../Generated/GB_AxB__times_second_int8.c | 513 + .../Generated/GB_AxB__times_second_uint16.c | 513 + .../Generated/GB_AxB__times_second_uint32.c | 513 + .../Generated/GB_AxB__times_second_uint64.c | 513 + .../Generated/GB_AxB__times_second_uint8.c | 513 + .../Generated/GB_AxB__times_times_fp32.c | 513 + .../Generated/GB_AxB__times_times_fp64.c | 513 + .../Generated/GB_AxB__times_times_int16.c | 513 + .../Generated/GB_AxB__times_times_int32.c | 513 + .../Generated/GB_AxB__times_times_int64.c | 513 + .../Generated/GB_AxB__times_times_int8.c | 513 + .../Generated/GB_AxB__times_times_uint16.c | 513 + .../Generated/GB_AxB__times_times_uint32.c | 513 + .../Generated/GB_AxB__times_times_uint64.c | 513 + .../Generated/GB_AxB__times_times_uint8.c | 513 + GraphBLAS/Source/GrB_BinaryOp_free.c | 2 +- GraphBLAS/Source/GrB_Descriptor_free.c | 2 +- GraphBLAS/Source/GrB_Matrix_reduce_scalar.c | 1 - GraphBLAS/Source/GrB_Monoid_free.c | 4 +- GraphBLAS/Source/GrB_Semiring_free.c | 2 +- GraphBLAS/Source/GrB_Type_free.c | 2 +- GraphBLAS/Source/GrB_UnaryOp_free.c | 2 +- GraphBLAS/Source/GrB_Vector_reduce_scalar.c | 1 - GraphBLAS/Source/GrB_init.c | 49 +- GraphBLAS/Source/GxB_SelectOp_free.c | 2 +- GraphBLAS/Source/GxB_stats.c | 52 + GraphBLAS/Source/README.txt | 5 +- GraphBLAS/Source/Template/GB_AxB.c | 513 + GraphBLAS/Source/Template/GB_AxB.h | 18 + .../Source/Template/GB_AxB_compare_template.c | 128 +- GraphBLAS/Source/Template/GB_AxB_factory.c | 69 +- GraphBLAS/Source/Template/GB_AxB_template.c | 160 +- GraphBLAS/Source/axb.m | 46 + GraphBLAS/Source/axb_compare_template.m | 70 + GraphBLAS/Source/axb_method.m | 64 + GraphBLAS/Source/axb_template.m | 68 + GraphBLAS/Tcov/Makefile | 3 +- GraphBLAS/Tcov/README.txt | 3 +- GraphBLAS/Tcov/gbcover.m | 3 +- GraphBLAS/Tcov/gbcover_start.c | 2 +- GraphBLAS/Tcov/log.txt | 37 - GraphBLAS/Tcov/log_Dec28.txt | 41 + GraphBLAS/Test/Contents.m | 16 +- GraphBLAS/Test/GB_mex.h | 40 +- GraphBLAS/Test/GB_mex_Col_assign.c | 6 +- GraphBLAS/Test/GB_mex_Matrix_extractElement.c | 10 +- GraphBLAS/Test/GB_mex_Vector_extractElement.c | 10 +- GraphBLAS/Test/GB_mex_assign.c | 6 +- GraphBLAS/Test/GB_mex_assign_alias.c | 96 + GraphBLAS/Test/GB_mex_cast.c | 2 +- GraphBLAS/Test/GB_mex_errors.c | 375 +- GraphBLAS/Test/GB_mex_extractTuples.c | 4 +- GraphBLAS/Test/GB_mex_mxm_alias.c | 100 + GraphBLAS/Test/GB_mex_op.c | 23 +- GraphBLAS/Test/GB_mex_reduce_to_scalar.c | 3 +- GraphBLAS/Test/GB_mex_setElement.c | 6 +- GraphBLAS/Test/GB_mex_subassign.c | 112 +- GraphBLAS/Test/GB_mex_subassign_alias.c | 78 + GraphBLAS/Test/GB_mx_get_global.c | 2 +- GraphBLAS/Test/GB_mx_isequal.c | 60 + GraphBLAS/Test/GB_mx_mxArray_to_Matrix.c | 36 +- GraphBLAS/Test/GB_mx_mxArray_to_indices.c | 2 +- GraphBLAS/Test/GB_mx_put_global.c | 7 +- GraphBLAS/Test/GB_mx_same.c | 28 + GraphBLAS/Test/GB_mx_xsame.c | 32 + GraphBLAS/Test/GB_spec_identity.m | 2 +- GraphBLAS/Test/GB_spec_select.m | 2 +- GraphBLAS/Test/Makefile | 9 +- GraphBLAS/Test/gbmake.m | 12 +- GraphBLAS/Test/gg.m | 10 - GraphBLAS/Test/test06.m | 7 +- GraphBLAS/Test/test28.m | 27 + GraphBLAS/Test/test29.m | 47 + GraphBLAS/Test/test52.m | 85 +- GraphBLAS/Test/test69.m | 39 + GraphBLAS/Test/test75.m | 177 + GraphBLAS/Test/testall.m | 12 +- GraphBLAS/Test/testc7.m | 26 +- GraphBLAS/Test/testca.m | 3 +- KLU/Doc/Makefile | 12 +- Makefile | 14 +- README.txt | 6 +- SuiteSparse_config/Makefile | 2 +- SuiteSparse_config/README.txt | 1 + SuiteSparse_config/SuiteSparse_config.h | 8 +- SuiteSparse_config/SuiteSparse_config.mk | 6 +- metis-5.1.0/GKlib/Makefile | 2 +- metis-5.1.0/Makefile | 2 +- ssget/mat/HB/bcsstk20.mat | Bin 14968 -> 0 bytes 1117 files changed, 520384 insertions(+), 2533 deletions(-) delete mode 100644 .gitignore create mode 100644 GraphBLAS/Demo/MATLAB/tri_matlab.m create mode 100644 GraphBLAS/Demo/MATLAB/tri_matlab_out.txt create mode 100644 GraphBLAS/Demo/Output/go3_out_laptop.txt create mode 100644 GraphBLAS/Demo/Output/go_out_cholesky.txt create mode 100644 GraphBLAS/Demo/Output/go_out_laptop.txt create mode 100755 GraphBLAS/Demo/go3 delete mode 100755 GraphBLAS/Demo/t1 delete mode 100755 GraphBLAS/Demo/tdemo create mode 100644 GraphBLAS/Source/GB_AxB_methods.h delete mode 100644 GraphBLAS/Source/GB_queue_init.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_first_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_land_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_le_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lor_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_lxor_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__eq_second_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_eq_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_first_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ge_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_gt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_land_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_le_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lor_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_lxor_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_ne_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__land_second_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_first_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_land_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_le_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lor_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_lxor_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lor_second_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_first_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_land_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lor_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_lxor_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__lxor_second_bool.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_div_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_first_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isge_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isle_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_islt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_isne_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_land_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lor_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_max_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_min_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_minus_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_plus_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_second_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__max_times_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_div_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_first_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isge_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isle_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_islt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_isne_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_land_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lor_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_max_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_min_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_minus_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_plus_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_second_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__min_times_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_div_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_first_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_land_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_max_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_min_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_second_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__plus_times_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__semirings.h create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_div_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_first_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isge_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isle_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_islt_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_isne_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_land_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lor_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_max_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_min_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_minus_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_plus_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_second_uint8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_fp32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_fp64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_int16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_int32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_int64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_int8.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_uint16.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_uint32.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_uint64.c create mode 100644 GraphBLAS/Source/Generated/GB_AxB__times_times_uint8.c create mode 100644 GraphBLAS/Source/GxB_stats.c create mode 100644 GraphBLAS/Source/Template/GB_AxB.c create mode 100644 GraphBLAS/Source/Template/GB_AxB.h create mode 100644 GraphBLAS/Source/axb.m create mode 100644 GraphBLAS/Source/axb_compare_template.m create mode 100644 GraphBLAS/Source/axb_method.m create mode 100644 GraphBLAS/Source/axb_template.m delete mode 100644 GraphBLAS/Tcov/log.txt create mode 100644 GraphBLAS/Tcov/log_Dec28.txt create mode 100644 GraphBLAS/Test/GB_mex_assign_alias.c create mode 100644 GraphBLAS/Test/GB_mex_mxm_alias.c create mode 100644 GraphBLAS/Test/GB_mex_subassign_alias.c create mode 100644 GraphBLAS/Test/GB_mx_isequal.c create mode 100644 GraphBLAS/Test/GB_mx_same.c create mode 100644 GraphBLAS/Test/GB_mx_xsame.c delete mode 100644 GraphBLAS/Test/gg.m create mode 100644 GraphBLAS/Test/test28.m create mode 100644 GraphBLAS/Test/test29.m create mode 100644 GraphBLAS/Test/test69.m create mode 100644 GraphBLAS/Test/test75.m delete mode 100644 ssget/mat/HB/bcsstk20.mat diff --git a/.gitignore b/.gitignore deleted file mode 100644 index c739908bbf..0000000000 --- a/.gitignore +++ /dev/null @@ -1,222 +0,0 @@ -# Ignore these files: -*.o -*.so.* -*.so -*.dylib -*.a -*.obj -*.ln -*.bb -*.bbg -*.da -*.tcov -*.gcov -gmon.out -*.bak -*.d -*.gcda -*.gcno -*.aux -*.bbl -*.blg -*.log -*.toc -*.dvi -*.lof -*.lot -*.dll -*.dSYM -my_*.out -*.gcda -*.gcno -*.mex* -*.profile -*.swp -.DS_Store -.nfs* -.pyc - -# ignore these specific programs in the Package/Demo directories -AMD/Demo/amd_demo -AMD/Demo/amd_demo2 -AMD/Demo/amd_l_demo -AMD/Demo/amd_simple -CAMD/Demo/camd_demo -CAMD/Demo/camd_demo2 -CAMD/Demo/camd_l_demo -CAMD/Demo/camd_simple -CCOLAMD/Demo/ccolamd_example -CCOLAMD/Demo/ccolamd_l_example -CHOLMOD/Demo/cholmod_demo -CHOLMOD/Demo/cholmod_l_demo -CHOLMOD/Demo/cholmod_simple -CHOLMOD/Demo/timelog.m -COLAMD/Demo/colamd_example -COLAMD/Demo/colamd_l_example -CSparse/Demo/cs_demo1 -CSparse/Demo/cs_demo2 -CSparse/Demo/cs_demo3 -CXSparse/Demo/cs_ci_demo1 -CXSparse/Demo/cs_ci_demo2 -CXSparse/Demo/cs_ci_demo3 -CXSparse/Demo/cs_cl_demo1 -CXSparse/Demo/cs_cl_demo2 -CXSparse/Demo/cs_cl_demo3 -CXSparse/Demo/cs_demo1 -CXSparse/Demo/cs_demo2 -CXSparse/Demo/cs_demo3 -CXSparse/Demo/cs_di_demo1 -CXSparse/Demo/cs_di_demo2 -CXSparse/Demo/cs_di_demo3 -CXSparse/Demo/cs_dl_demo1 -CXSparse/Demo/cs_dl_demo2 -CXSparse/Demo/cs_dl_demo3 -CXSparse/Demo/cs_idemo -CXSparse/Demo/cs_ldemo -KLU/Demo/klu_simple -KLU/Demo/kludemo -KLU/Demo/kluldemo -LDL/Demo/ldlamd -LDL/Demo/ldllamd -LDL/Demo/ldllmain -LDL/Demo/ldllsimple -LDL/Demo/ldlmain -LDL/Demo/ldlsimple -RBio/Demo/RBdemo -RBio/Demo/temp.rb -SPQR/Demo/qrdemo -SPQR/Demo/qrsimple -SPQR/Demo/qrsimplec -SPQR/Demo/C.mtx -SPQR/Demo/E.txt -SPQR/Demo/R.mtx -SPQR/Demo/X.mtx -SPQR/Demo/gpu_results.txt -SPQR/Demo/qrdemo_gpu -SPQR/Demo/qrdemo_gpu2 -SPQR/Demo/qrdemo_gpu3 -UMFPACK/Demo/numeric.umf -UMFPACK/Demo/symbolic.umf -UMFPACK/Demo/umfpack_di_demo -UMFPACK/Demo/umfpack_dl_demo -UMFPACK/Demo/umfpack_simple -UMFPACK/Demo/umfpack_zi_demo -UMFPACK/Demo/umfpack_zl_demo - -# ignore these specific programs in the Package/Tcov directories -CHOLMOD/Tcov/cl -CHOLMOD/Tcov/clread -CHOLMOD/Tcov/cm -CHOLMOD/Tcov/cmread -CHOLMOD/Tcov/covs.out -CHOLMOD/Tcov/ldemo -CHOLMOD/Tcov/ldemo.c -CHOLMOD/Tcov/temp*.mtx -CHOLMOD/Tcov/timelog.m -CHOLMOD/Tcov/l_*.c -CHOLMOD/Tcov/z_*.c -CHOLMOD/Tcov/zz_*.c -CHOLMOD/Tcov/zl_*.c -CHOLMOD/Tcov/zdemo -CHOLMOD/Tcov/zdemo.c - -CSparse/Tcov/cov.out -CSparse/Tcov/cov.sort -CSparse/Tcov/cover.out -CSparse/Tcov/covs.out -CSparse/Tcov/cs_*.c -CSparse/Tcov/cstcov_test -CSparse/Tcov/*.out -CSparse/Tcov/cs_demo1 -CSparse/Tcov/cs_demo2 -CSparse/Tcov/cs_demo3 - -CXSparse/Tcov/cov.out -CXSparse/Tcov/cov.sort -CXSparse/Tcov/cover.out -CXSparse/Tcov/covs.out -CXSparse/Tcov/cs_*.c -CXSparse/Tcov/cs*_ci.c -CXSparse/Tcov/cs*_cl.c -CXSparse/Tcov/cs*_di.c -CXSparse/Tcov/cs*_dl.c -CXSparse/Tcov/*.out -CXSparse/Tcov/cs_demo1_ci -CXSparse/Tcov/cs_demo1_cl -CXSparse/Tcov/cs_demo1_di -CXSparse/Tcov/cs_demo1_dl -CXSparse/Tcov/cs_demo2_ci -CXSparse/Tcov/cs_demo2_cl -CXSparse/Tcov/cs_demo2_di -CXSparse/Tcov/cs_demo2_dl -CXSparse/Tcov/cs_demo3_ci -CXSparse/Tcov/cs_demo3_cl -CXSparse/Tcov/cs_demo3_di -CXSparse/Tcov/cs_demo3_dl -CXSparse/Tcov/cs_idemo -CXSparse/Tcov/cs_ldemo -CXSparse/Tcov/cstcov_test_ci -CXSparse/Tcov/cstcov_test_cl -CXSparse/Tcov/cstcov_test_di -CXSparse/Tcov/cstcov_test_dl - -KLU/Tcov/cov_*.c -KLU/Tcov/klutest -KLU/Tcov/klultest -KLU/Tcov/*.out - -SPQR/Tcov/X.mtx -SPQR/Tcov/gpu_results.txt -SPQR/Tcov/gpuqrengine_demo -SPQR/Tcov/qrdemo_gpu -SPQR/Tcov/qrtest -SPQR/Tcov/qrtest_out.txt -SPQR/Tcov/troll.m -SPQR/Tcov/cov.out - -UMFPACK/Tcov/covall_err.out -UMFPACK/Tcov/cover.out - -# ignore these specific files in the Package/MATLAB directories -MATLAB_Tools/spqr_rank/save_samples_demo_spqr_rank.mat -CXSparse/MATLAB/CSparse/cs_cl_*.c -CXSparse/MATLAB/Test/cs_*.c - -RBio/Tcov/RBdemo -RBio/Tcov/RBdemo.c -RBio/Tcov/RBio.c -RBio/Tcov/RBio.h -RBio/Tcov/RBtest -RBio/Tcov/SuiteSparse_config.c -RBio/Tcov/SuiteSparse_config.h -RBio/Tcov/*.out -RBio/Tcov/*.rb - -# GraphBLAS -GraphBLAS/Demo/bfs_demo.out -GraphBLAS/Demo/complex_demo.m -GraphBLAS/Demo/mis_demo.out -GraphBLAS/Demo/simple_demo.out -GraphBLAS/Demo/wildtype_demo.out -GraphBLAS/Demo/tri_demo.out -GraphBLAS/Test/errlog.txt -GraphBLAS/Test/log.txt -GraphBLAS/Doc/GraphBLAS_UserGuide.out -GraphBLAS/Tcov/cover_GB_2type_template.c -GraphBLAS/Tcov/cover_GB_AxB_compare_template.c -GraphBLAS/Tcov/cover_GB_AxB_factory.c -GraphBLAS/Tcov/cover_GB_AxB_template.c -GraphBLAS/Tcov/cover_GB_assoc_template.c -GraphBLAS/Tcov/cover_GB_ops_template.c -GraphBLAS/Tcov/cover_GB_qsort_template.c -GraphBLAS/Tcov/cover_GB_semiring_template.c -GraphBLAS/Tcov/cover_GB_subref_template.c -GraphBLAS/Tcov/cover_gb.c -GraphBLAS/Tcov/cover_gb_exp.c -GraphBLAS/Tcov/errlog.txt -GraphBLAS/Tcov/log.txt -GraphBLAS/Tcov/gbstat.mat - -# Do not ignore this file -!.gitignore - diff --git a/AMD/Doc/Makefile b/AMD/Doc/Makefile index e90536a289..4769bf1af2 100644 --- a/AMD/Doc/Makefile +++ b/AMD/Doc/Makefile @@ -23,9 +23,9 @@ distclean: clean #------------------------------------------------------------------------------ AMD_UserGuide.pdf: AMD_UserGuide.tex AMD_UserGuide.bib ../Include/amd.h - echo '\begin{verbatim}' > amd_h.tex + echo '\\begin{verbatim}' > amd_h.tex expand -8 ../Include/amd.h >> amd_h.tex - echo '\end{verbatim}' >> amd_h.tex + echo '\\end{verbatim}' >> amd_h.tex pdflatex AMD_UserGuide bibtex AMD_UserGuide pdflatex AMD_UserGuide diff --git a/CAMD/Doc/Makefile b/CAMD/Doc/Makefile index 3ce39e3628..8d177e61e2 100644 --- a/CAMD/Doc/Makefile +++ b/CAMD/Doc/Makefile @@ -23,9 +23,9 @@ distclean: clean #------------------------------------------------------------------------------ CAMD_UserGuide.pdf: CAMD_UserGuide.tex CAMD_UserGuide.bib ../Include/camd.h - echo '\begin{verbatim}' > camd_h.tex + echo '\\begin{verbatim}' > camd_h.tex expand -8 ../Include/camd.h >> camd_h.tex - echo '\end{verbatim}' >> camd_h.tex + echo '\\end{verbatim}' >> camd_h.tex pdflatex CAMD_UserGuide bibtex CAMD_UserGuide pdflatex CAMD_UserGuide diff --git a/CSparse/Lib/Makefile b/CSparse/Lib/Makefile index d08abe68df..e3f32cec28 100644 --- a/CSparse/Lib/Makefile +++ b/CSparse/Lib/Makefile @@ -15,8 +15,8 @@ # CSparse/Lib. It does not install it for system-wide usage. LIBRARY = libcsparse +CF = $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -O -CF = $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -O -fPIC I = -I../Include RANLIB = ranlib ARCHIVE = $(AR) $(ARFLAGS) diff --git a/ChangeLog b/ChangeLog index 981a53a900..e00a4b3056 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +Dec 23, 2017: SuiteSparse 5.1.2 + + * improved build process for GraphBLAS + * minor change to CSparse/Lib/Makefile, no change in CSparse version + +Dec 17, 2017: SuiteSparse 5.1.1 + + * GraphBLAS added to top-level SuiteSparse/Makefile + * GraphBLAS 1.1.1: bug fix to *assign, split AxB for faster compile, + added memory usage statistics, AxB performance improvment + * minor update to [AMD CAMD KLU]/Doc/Makefile's, no change to + version numbers of AMD, CAMD, or KLU + Dec 1, 2017: SuiteSparse 5.1.0 * GraphBLAS 1.1.0 diff --git a/GraphBLAS/.gitignore b/GraphBLAS/.gitignore index e124ab550c..91917878cf 100644 --- a/GraphBLAS/.gitignore +++ b/GraphBLAS/.gitignore @@ -48,6 +48,7 @@ Test/log.txt Doc/GraphBLAS_UserGuide.log Doc/GraphBLAS_UserGuide.out Tcov/cover_GB_2type_template.c +Tcov/cover_GB_AxB.c Tcov/cover_GB_AxB_compare_template.c Tcov/cover_GB_AxB_factory.c Tcov/cover_GB_AxB_template.c diff --git a/GraphBLAS/CMakeLists.txt b/GraphBLAS/CMakeLists.txt index 657696c392..7ab3ac14a6 100644 --- a/GraphBLAS/CMakeLists.txt +++ b/GraphBLAS/CMakeLists.txt @@ -37,6 +37,8 @@ cmake_minimum_required ( VERSION 2.8.12 ) project ( graphblas ) +include ( GNUInstallDirs ) + if ( CMAKE_VERSION VERSION_GREATER "3.0" ) cmake_policy ( SET CMP0042 NEW ) endif ( ) @@ -51,20 +53,20 @@ endif ( ) set ( CMAKE_INCLUDE_CURRENT_DIR ON ) # include directories for both graphblas and graphblasdemo libraries -include_directories ( Source/Template Include Demo/Include ) +include_directories ( Source/Template Source Source/Generated Include Demo/Include ) # check which compiler is being used. If you need to make # compiler-specific modifications, here is the place to do it. if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") # cmake 2.8 workaround: gcc needs to be told to do ANSI C11. # cmake 3.0 doesn't have this problem. - set (CMAKE_C_FLAGS "-std=c11 -lm") + set (CMAKE_C_FLAGS "-std=c11 -lm -fopenmp") if (CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9) message (FATAL_ERROR "gcc version must be at least 4.9") endif ( ) elseif ("${CMAKE_C_COMPILER_ID}" STREQUAL "Intel") # options for icc: also needs -std=c11 - set (CMAKE_C_FLAGS "-std=c11") + set (CMAKE_C_FLAGS "-std=c11 -fopenmp") if (CMAKE_C_COMPILER_VERSION VERSION_LESS 18.0) message (FATAL_ERROR "icc version must be at least 18.0") endif ( ) @@ -77,19 +79,51 @@ elseif ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") # options for MicroSoft Visual Studio endif ( ) -# create the graphblas library. Requires ANSI C11 -file ( GLOB GRAPHBLAS_SOURCES "Source/*.c" ) +# create the dynamic graphblas library. Requires ANSI C11 +file ( GLOB GRAPHBLAS_SOURCES "Source/*.c" "Source/Generated/*.c" ) add_library ( graphblas SHARED ${GRAPHBLAS_SOURCES} ) -SET_TARGET_PROPERTIES ( graphblas PROPERTIES VERSION 1.1.0 +SET_TARGET_PROPERTIES ( graphblas PROPERTIES VERSION 1.1.2 SOVERSION 1 C_STANDARD_REQUIRED 11 PUBLIC_HEADER "Include/GraphBLAS.h" ) set_property ( TARGET graphblas PROPERTY C_STANDARD 11 ) +# create the static graphblas library. Requires ANSI C11 +add_library ( graphblas_static STATIC ${GRAPHBLAS_SOURCES} ) +SET_TARGET_PROPERTIES ( graphblas_static PROPERTIES VERSION 1.1.2 + OUTPUT_NAME graphblas + POSITION_INDEPENDENT_CODE OFF + SOVERSION 1 + C_STANDARD_REQUIRED 11 + PUBLIC_HEADER "Include/GraphBLAS.h" ) +set_property ( TARGET graphblas_static PROPERTY C_STANDARD 11 ) + +# Notes from Sebastien Villemot (sebastien@debian.org): +# SOVERSION policy: if a binary compiled against the old version of the shared +# library needs recompiling in order to work with the new version, then a +# SO_VERSION increase # is needed. Otherwise not. Examples of the changes that +# require a SO_VERSION increase: +# +# - a public function or static variable is removed +# - the prototype of a public function changes +# - the integer value attached to a public #define or enum changes +# - the fields of a public structure are modified +# +# Examples of changes that do not require a SO_VERSION increase: +# +# - a new public function or static variable is added +# - a private function or static variable is removed or modified +# - changes in the internals of a structure that is opaque to the calling +# program (i.e. is only a pointer manipulated through public functions of +# the library) +# - a public enum is extended (by adding a new item at the end, but without +# changing the already existing items) + # graphblas installation location -install ( TARGETS graphblas - LIBRARY DESTINATION /usr/local/lib - PUBLIC_HEADER DESTINATION /usr/local/include ) +install ( TARGETS graphblas graphblas_static + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) # Demo library file ( GLOB DEMO_SOURCES "Demo/Source/*.c" ) @@ -98,7 +132,13 @@ SET_TARGET_PROPERTIES ( graphblasdemo PROPERTIES C_STANDARD_REQUIRED 11 ) set_property ( TARGET graphblasdemo PROPERTY C_STANDARD 11 ) -target_link_libraries ( graphblasdemo graphblas ) +add_library ( graphblasdemo_static STATIC ${DEMO_SOURCES} ) +SET_TARGET_PROPERTIES ( graphblasdemo_static PROPERTIES + C_STANDARD_REQUIRED 11 ) +set_property ( TARGET graphblasdemo_static PROPERTY C_STANDARD 11 ) + +target_link_libraries ( graphblasdemo m graphblas ) +target_link_libraries ( graphblasdemo_static graphblas_static ) # Demo programs add_executable ( bfs_demo "Demo/Program/bfs_demo.c" ) diff --git a/GraphBLAS/Demo/Include/demos.h b/GraphBLAS/Demo/Include/demos.h index 9d7ab72ac8..0b9473faa2 100644 --- a/GraphBLAS/Demo/Include/demos.h +++ b/GraphBLAS/Demo/Include/demos.h @@ -53,7 +53,8 @@ GrB_Info read_matrix // read a double-precision matrix FILE *f, // file to read the tuples from bool make_symmetric, // if true, return A as symmetric bool no_self_edges, // if true, then remove self edges from A - bool one_based // if true, input matrix is 1-based + bool one_based, // if true, input matrix is 1-based + bool boolean // if true, input is GrB_BOOL, otherwise GrB_FP64 ) ; GrB_Info mis // compute a maximal independent set @@ -91,7 +92,8 @@ GrB_Info get_matrix // get a matrix from stdin, or create random one GrB_Matrix *A_output, // matrix to create int argc, // command-line arguments char **argv, - bool no_self_edges // if true, ensure the matrix has no self-edges + bool no_self_edges, // if true, ensure the matrix has no self-edges + bool boolean // if true, file is read as GrB_BOOL, else GrB_FP64 ) ; GrB_Info wathen // construct a random Wathen matrix diff --git a/GraphBLAS/Demo/MATLAB/tri_matlab.m b/GraphBLAS/Demo/MATLAB/tri_matlab.m new file mode 100644 index 0000000000..d405d1a8fc --- /dev/null +++ b/GraphBLAS/Demo/MATLAB/tri_matlab.m @@ -0,0 +1,106 @@ +clear +clear +diary tri_matlab_out.txt + +files = { +'/research/davisgroup/GraphChallenge/ssget/Mallya/lhr71_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/ssget/Freescale/Freescale2_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/cit-HepPh/cit-HepPh_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/cit-HepTh/cit-HepTh_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/email-EuAll/email-EuAll_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/soc-Epinions1/soc-Epinions1_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/soc-Slashdot0811/soc-Slashdot0811_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/soc-Slashdot0902/soc-Slashdot0902_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/amazon0312/amazon0312_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/amazon0505/amazon0505_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/amazon0601/amazon0601_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/flickrEdges/flickrEdges_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/cit-Patents/cit-Patents_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/ssget/SNAP/soc-LiveJournal1_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/ssget/Gleich/wb-edu_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/amazon0302/amazon0302_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/as-caida20071105/as-caida20071105_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/as20000102/as20000102_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/ca-AstroPh/ca-AstroPh_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/ca-CondMat/ca-CondMat_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/ca-GrQc/ca-GrQc_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/ca-HepPh/ca-HepPh_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/ca-HepTh/ca-HepTh_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/email-Enron/email-Enron_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/facebook_combined/facebook_combined_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/loc-brightkite_edges/loc-brightkite_edges_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/loc-gowalla_edges/loc-gowalla_edges_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon1_010331/oregon1_010331_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon1_010407/oregon1_010407_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon1_010414/oregon1_010414_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon1_010421/oregon1_010421_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon1_010428/oregon1_010428_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon1_010505/oregon1_010505_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon1_010512/oregon1_010512_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon1_010519/oregon1_010519_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon1_010526/oregon1_010526_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon2_010331/oregon2_010331_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon2_010407/oregon2_010407_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon2_010414/oregon2_010414_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon2_010421/oregon2_010421_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon2_010428/oregon2_010428_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon2_010505/oregon2_010505_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon2_010512/oregon2_010512_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon2_010519/oregon2_010519_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/oregon2_010526/oregon2_010526_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/p2p-Gnutella04/p2p-Gnutella04_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/p2p-Gnutella05/p2p-Gnutella05_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/p2p-Gnutella06/p2p-Gnutella06_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/p2p-Gnutella08/p2p-Gnutella08_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/p2p-Gnutella09/p2p-Gnutella09_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/p2p-Gnutella24/p2p-Gnutella24_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/p2p-Gnutella25/p2p-Gnutella25_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/p2p-Gnutella30/p2p-Gnutella30_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/p2p-Gnutella31/p2p-Gnutella31_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/roadNet-CA/roadNet-CA_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/roadNet-PA/roadNet-PA_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/snap/roadNet-TX/roadNet-TX_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/image-grid/g-1045506-262144_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/image-grid/g-16764930-4194304_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/image-grid/g-260610-65536_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/image-grid/g-268386306-67108864_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/image-grid/g-4188162-1048576_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/ssget/DIMACS10/hugebubbles-00020_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/ssget/vanHeukelum/cage15_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/graph500-scale18-ef16/graph500-scale18-ef16_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/graph500-scale19-ef16/graph500-scale19-ef16_adj.tsv.gz'} ; + +% out of memory (kills MATLAB on a 16GB laptop): +files = { +'/research/davisgroup/GraphChallenge/ssget/Freescale/circuit5M_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/graph500-scale20-ef16/graph500-scale20-ef16_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/graph500-scale21-ef16/graph500-scale21-ef16_adj.tsv.gz' +'/research/davisgroup/GraphChallenge/synthetic/graph500-scale22-ef16/graph500-scale22-ef16_adj.tsv.gz' } ; + +nfiles = length (files) ; +for k = 1:nfiles + filename = files {k} ; + fprintf ('\nMatrix: %s\n', filename) ; + system (sprintf ('gunzip -c %s > /tmp/adj', filename)) ; + + A = spconvert (load ('/tmp/adj')) ; + + % [ntri t] = tricount ('Sandia', A) ; + tic + U = triu (A, 1) ; + tprep = toc ; + clear A + tic + ntri = sum (sum (U*U .*U)) ; + ttri = toc ; + + ttot = tprep + ttri ; + + fprintf ('triangles %d prep %g tri %g total %g rate %g\n', ... + full (ntri), tprep, ttri, ttot, 1e-6*nnz(U)/ttot) ; + + clear U + diary off + diary on +end + diff --git a/GraphBLAS/Demo/MATLAB/tri_matlab_out.txt b/GraphBLAS/Demo/MATLAB/tri_matlab_out.txt new file mode 100644 index 0000000000..70c8873581 --- /dev/null +++ b/GraphBLAS/Demo/MATLAB/tri_matlab_out.txt @@ -0,0 +1,198 @@ + +Matrix: /research/davisgroup/GraphChallenge/ssget/Mallya/lhr71_adj.tsv.gz +triangles 160592 prep 0.0143872 tri 0.237386 total 0.251773 rate 5.92913 + +Matrix: /research/davisgroup/GraphChallenge/ssget/Freescale/Freescale2_adj.tsv.gz +triangles 21027280 prep 0.107127 tri 0.634098 total 0.741226 rate 7.75059 + +Matrix: /research/davisgroup/GraphChallenge/snap/cit-HepPh/cit-HepPh_adj.tsv.gz +triangles 1276868 prep 0.00447468 tri 0.358224 total 0.362699 rate 1.1604 + +Matrix: /research/davisgroup/GraphChallenge/snap/cit-HepTh/cit-HepTh_adj.tsv.gz +triangles 1478735 prep 0.00298542 tri 0.411518 total 0.414504 rate 0.849896 + +Matrix: /research/davisgroup/GraphChallenge/snap/email-EuAll/email-EuAll_adj.tsv.gz +triangles 267313 prep 0.00906737 tri 1.25468 total 1.26375 rate 0.288412 + +Matrix: /research/davisgroup/GraphChallenge/snap/soc-Epinions1/soc-Epinions1_adj.tsv.gz +triangles 1624481 prep 0.00428148 tri 0.777762 total 0.782043 rate 0.518821 + +Matrix: /research/davisgroup/GraphChallenge/snap/soc-Slashdot0811/soc-Slashdot0811_adj.tsv.gz +triangles 551724 prep 0.00615945 tri 0.983542 total 0.989701 rate 0.474062 + +Matrix: /research/davisgroup/GraphChallenge/snap/soc-Slashdot0902/soc-Slashdot0902_adj.tsv.gz +triangles 602592 prep 0.00680812 tri 0.978685 total 0.985493 rate 0.511652 + +Matrix: /research/davisgroup/GraphChallenge/snap/amazon0312/amazon0312_adj.tsv.gz +triangles 3686467 prep 0.0318552 tri 1.25298 total 1.28484 rate 1.82893 + +Matrix: /research/davisgroup/GraphChallenge/snap/amazon0505/amazon0505_adj.tsv.gz +triangles 3951063 prep 0.0321268 tri 1.14816 total 1.18029 rate 2.06681 + +Matrix: /research/davisgroup/GraphChallenge/snap/amazon0601/amazon0601_adj.tsv.gz +triangles 3986507 prep 0.0283822 tri 0.989228 total 1.01761 rate 2.40112 + +Matrix: /research/davisgroup/GraphChallenge/snap/flickrEdges/flickrEdges_adj.tsv.gz +triangles 107987357 prep 0.0234496 tri 4.77951 total 4.80296 rate 0.4824 + +Matrix: /research/davisgroup/GraphChallenge/snap/cit-Patents/cit-Patents_adj.tsv.gz +triangles 7515023 prep 0.249611 tri 10.7758 total 11.0255 rate 1.49826 + +Matrix: /research/davisgroup/GraphChallenge/ssget/SNAP/soc-LiveJournal1_adj.tsv.gz +triangles 285730264 prep 0.613591 tri 105.862 total 106.476 rate 0.402449 + +Matrix: /research/davisgroup/GraphChallenge/ssget/Gleich/wb-edu_adj.tsv.gz +triangles 254718147 prep 0.664859 tri 66.9712 total 67.6361 rate 0.683601 + +Matrix: /research/davisgroup/GraphChallenge/snap/amazon0302/amazon0302_adj.tsv.gz +triangles 717719 prep 0.0140822 tri 0.230047 total 0.244129 rate 3.68572 + +Matrix: /research/davisgroup/GraphChallenge/snap/as-caida20071105/as-caida20071105_adj.tsv.gz +triangles 36365 prep 0.0010467 tri 0.108408 total 0.109454 rate 0.487701 + +Matrix: /research/davisgroup/GraphChallenge/snap/as20000102/as20000102_adj.tsv.gz +triangles 6584 prep 0.00537723 tri 0.025047 total 0.0304242 rate 0.413223 + +Matrix: /research/davisgroup/GraphChallenge/snap/ca-AstroPh/ca-AstroPh_adj.tsv.gz +triangles 1351441 prep 0.00133915 tri 0.127284 total 0.128623 rate 1.53977 + +Matrix: /research/davisgroup/GraphChallenge/snap/ca-CondMat/ca-CondMat_adj.tsv.gz +triangles 173361 prep 0.000961928 tri 0.0218009 total 0.0227628 rate 4.1049 + +Matrix: /research/davisgroup/GraphChallenge/snap/ca-GrQc/ca-GrQc_adj.tsv.gz +triangles 48260 prep 0.000192834 tri 0.00381992 total 0.00401275 rate 3.60949 + +Matrix: /research/davisgroup/GraphChallenge/snap/ca-HepPh/ca-HepPh_adj.tsv.gz +triangles 3358499 prep 0.000943791 tri 0.0677774 total 0.0687212 rate 1.7242 + +Matrix: /research/davisgroup/GraphChallenge/snap/ca-HepTh/ca-HepTh_adj.tsv.gz +triangles 28339 prep 0.000349599 tri 0.00517511 total 0.00552471 rate 4.70124 + +Matrix: /research/davisgroup/GraphChallenge/snap/email-Enron/email-Enron_adj.tsv.gz +triangles 727044 prep 0.0015489 tri 0.219827 total 0.221376 rate 0.830402 + +Matrix: /research/davisgroup/GraphChallenge/snap/facebook_combined/facebook_combined_adj.tsv.gz +triangles 1612010 prep 0.000524091 tri 0.0261147 total 0.0266388 rate 3.31223 + +Matrix: /research/davisgroup/GraphChallenge/snap/loc-brightkite_edges/loc-brightkite_edges_adj.tsv.gz +triangles 494728 prep 0.00281131 tri 0.11863 total 0.121441 rate 1.76281 + +Matrix: /research/davisgroup/GraphChallenge/snap/loc-gowalla_edges/loc-gowalla_edges_adj.tsv.gz +triangles 2273138 prep 0.0132991 tri 3.68387 total 3.69717 rate 0.257042 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon1_010331/oregon1_010331_adj.tsv.gz +triangles 17144 prep 0.00138479 tri 0.0380629 total 0.0394476 rate 0.557752 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon1_010407/oregon1_010407_adj.tsv.gz +triangles 15834 prep 0.000259382 tri 0.0383753 total 0.0386347 rate 0.56941 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon1_010414/oregon1_010414_adj.tsv.gz +triangles 18237 prep 0.0110612 tri 0.0421322 total 0.0531935 rate 0.422402 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon1_010421/oregon1_010421_adj.tsv.gz +triangles 19108 prep 0.000252379 tri 0.0403147 total 0.0405671 rate 0.560725 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon1_010428/oregon1_010428_adj.tsv.gz +triangles 17645 prep 0.000289668 tri 0.039345 total 0.0396347 rate 0.567508 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon1_010505/oregon1_010505_adj.tsv.gz +triangles 17597 prep 0.000290858 tri 0.0403442 total 0.0406351 rate 0.556342 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon1_010512/oregon1_010512_adj.tsv.gz +triangles 17598 prep 0.000266996 tri 0.0500232 total 0.0502902 rate 0.450923 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon1_010519/oregon1_010519_adj.tsv.gz +triangles 17677 prep 0.00026798 tri 0.0427906 total 0.0430586 rate 0.527746 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon1_010526/oregon1_010526_adj.tsv.gz +triangles 19894 prep 0.000269287 tri 0.0406778 total 0.0409471 rate 0.571689 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon2_010331/oregon2_010331_adj.tsv.gz +triangles 82856 prep 0.00142282 tri 0.0483695 total 0.0497923 rate 0.626201 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon2_010407/oregon2_010407_adj.tsv.gz +triangles 78138 prep 0.00138681 tri 0.050013 total 0.0513998 rate 0.600294 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon2_010414/oregon2_010414_adj.tsv.gz +triangles 88905 prep 0.000386018 tri 0.0496562 total 0.0500422 rate 0.634684 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon2_010421/oregon2_010421_adj.tsv.gz +triangles 82129 prep 0.000308181 tri 0.0498343 total 0.0501425 rate 0.628967 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon2_010428/oregon2_010428_adj.tsv.gz +triangles 78000 prep 0.000400167 tri 0.0485411 total 0.0489413 rate 0.64228 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon2_010505/oregon2_010505_adj.tsv.gz +triangles 72182 prep 0.00555591 tri 0.0535838 total 0.0591397 rate 0.523218 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon2_010512/oregon2_010512_adj.tsv.gz +triangles 72866 prep 0.000405702 tri 0.0596244 total 0.0600301 rate 0.521455 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon2_010519/oregon2_010519_adj.tsv.gz +triangles 83709 prep 0.000408464 tri 0.0619384 total 0.0623469 rate 0.517861 + +Matrix: /research/davisgroup/GraphChallenge/snap/oregon2_010526/oregon2_010526_adj.tsv.gz +triangles 89541 prep 0.00042159 tri 0.0631827 total 0.0636043 rate 0.514588 + +Matrix: /research/davisgroup/GraphChallenge/snap/p2p-Gnutella04/p2p-Gnutella04_adj.tsv.gz +triangles 934 prep 0.000497533 tri 0.00672949 total 0.00722702 rate 5.53395 + +Matrix: /research/davisgroup/GraphChallenge/snap/p2p-Gnutella05/p2p-Gnutella05_adj.tsv.gz +triangles 1112 prep 0.000394091 tri 0.00542901 total 0.0058231 rate 5.46771 + +Matrix: /research/davisgroup/GraphChallenge/snap/p2p-Gnutella06/p2p-Gnutella06_adj.tsv.gz +triangles 1142 prep 0.000335403 tri 0.00519042 total 0.00552583 rate 5.70503 + +Matrix: /research/davisgroup/GraphChallenge/snap/p2p-Gnutella08/p2p-Gnutella08_adj.tsv.gz +triangles 2383 prep 0.00679597 tri 0.00332949 total 0.0101255 rate 2.05196 + +Matrix: /research/davisgroup/GraphChallenge/snap/p2p-Gnutella09/p2p-Gnutella09_adj.tsv.gz +triangles 2354 prep 0.00023195 tri 0.0037709 total 0.00400285 rate 6.49862 + +Matrix: /research/davisgroup/GraphChallenge/snap/p2p-Gnutella24/p2p-Gnutella24_adj.tsv.gz +triangles 986 prep 0.000947353 tri 0.0119743 total 0.0129217 rate 5.05885 + +Matrix: /research/davisgroup/GraphChallenge/snap/p2p-Gnutella25/p2p-Gnutella25_adj.tsv.gz +triangles 806 prep 0.000687484 tri 0.0055359 total 0.00622338 rate 8.79024 + +Matrix: /research/davisgroup/GraphChallenge/snap/p2p-Gnutella30/p2p-Gnutella30_adj.tsv.gz +triangles 1590 prep 0.00143357 tri 0.0146289 total 0.0160624 rate 5.49904 + +Matrix: /research/davisgroup/GraphChallenge/snap/p2p-Gnutella31/p2p-Gnutella31_adj.tsv.gz +triangles 2024 prep 0.00264621 tri 0.0208612 total 0.0235074 rate 6.2913 + +Matrix: /research/davisgroup/GraphChallenge/snap/roadNet-CA/roadNet-CA_adj.tsv.gz +triangles 120676 prep 0.0736232 tri 0.217636 total 0.291259 rate 9.49877 + +Matrix: /research/davisgroup/GraphChallenge/snap/roadNet-PA/roadNet-PA_adj.tsv.gz +triangles 67150 prep 0.0368046 tri 0.120113 total 0.156917 rate 9.82617 + +Matrix: /research/davisgroup/GraphChallenge/snap/roadNet-TX/roadNet-TX_adj.tsv.gz +triangles 82869 prep 0.0470903 tri 0.156379 total 0.203469 rate 9.44448 + +Matrix: /research/davisgroup/GraphChallenge/synthetic/image-grid/g-1045506-262144_adj.tsv.gz +triangles 1044484 prep 0.0108022 tri 0.078203 total 0.0890052 rate 11.7466 + +Matrix: /research/davisgroup/GraphChallenge/synthetic/image-grid/g-16764930-4194304_adj.tsv.gz +triangles 16760836 prep 0.20142 tri 0.989628 total 1.19105 rate 14.0758 + +Matrix: /research/davisgroup/GraphChallenge/synthetic/image-grid/g-260610-65536_adj.tsv.gz +triangles 260100 prep 0.00280764 tri 0.0211592 total 0.0239668 rate 10.8738 + +Matrix: /research/davisgroup/GraphChallenge/synthetic/image-grid/g-268386306-67108864_adj.tsv.gz +triangles 268369924 prep 12.55 tri 51.8101 total 64.3601 rate 4.17007 + +Matrix: /research/davisgroup/GraphChallenge/synthetic/image-grid/g-4188162-1048576_adj.tsv.gz +triangles 4186116 prep 0.0496269 tri 0.258787 total 0.308413 rate 13.5797 + +Matrix: /research/davisgroup/GraphChallenge/ssget/DIMACS10/hugebubbles-00020_adj.tsv.gz +triangles 0 prep 0.718285 tri 6.68752 total 7.40581 rate 4.2926 + +Matrix: /research/davisgroup/GraphChallenge/ssget/vanHeukelum/cage15_adj.tsv.gz +triangles 36106416 prep 0.631395 tri 9.55582 total 10.1872 rate 4.61582 + +Matrix: /research/davisgroup/GraphChallenge/synthetic/graph500-scale18-ef16/graph500-scale18-ef16_adj.tsv.gz +triangles 82287285 prep 0.0409373 tri 58.6388 total 58.6797 rate 0.0647643 + +Matrix: /research/davisgroup/GraphChallenge/synthetic/graph500-scale19-ef16/graph500-scale19-ef16_adj.tsv.gz +triangles 186288972 prep 0.075334 tri 194.017 total 194.092 rate 0.0398248 diff --git a/GraphBLAS/Demo/Output/bfs_demo.out b/GraphBLAS/Demo/Output/bfs_demo.out index c6199319d6..13c80e3b9f 100644 --- a/GraphBLAS/Demo/Output/bfs_demo.out +++ b/GraphBLAS/Demo/Output/bfs_demo.out @@ -1,28 +1,28 @@ Wathen: nx 4 ny 4 n 65 nz 817 method 0, time: 0.000 sec method 5: vector assign and reduce: -BFS time in seconds: 0.000111 +BFS time in seconds: 0.000131 nodes reachable from node 0: 65 out of 65 max BFS level: 5 method 5: same but check each result -BFS time in seconds: 0.000054 +BFS time in seconds: 0.000046 nodes reachable from node 0: 65 out of 65 max BFS level: 5 method 6: apply unary operator -BFS time in seconds: 0.000076 +BFS time in seconds: 0.000067 nodes reachable from node 0: 65 out of 65 max BFS level: 5 method 6: same but check each result -BFS time in seconds: 0.000062 +BFS time in seconds: 0.000070 nodes reachable from node 0: 65 out of 65 max BFS level: 5 random 5 by 5, nz: 23, method 1 time 0.000 sec method 5: vector assign and reduce: -BFS time in seconds: 0.000079 +BFS time in seconds: 0.000110 nodes reachable from node 0: 5 out of 5 max BFS level: 2 @@ -32,68 +32,68 @@ nodes reachable from node 0: 5 out of 5 max BFS level: 2 method 6: apply unary operator -BFS time in seconds: 0.000024 +BFS time in seconds: 0.000034 nodes reachable from node 0: 5 out of 5 max BFS level: 2 method 6: same but check each result -BFS time in seconds: 0.000011 +BFS time in seconds: 0.000018 nodes reachable from node 0: 5 out of 5 max BFS level: 2 ntuples: 3 nrows 3 ncols 3 -time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000027 +time to prune self-edges: 0.000000 +time to build the graph with GrB_Matrix_build: 0.000034 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000067 +A = (C+C')/2 time 0.000026 matrix 3 by 3, 3 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000064 +BFS time in seconds: 0.000075 nodes reachable from node 0: 1 out of 3 max BFS level: 1 method 5: same but check each result -BFS time in seconds: 0.000037 +BFS time in seconds: 0.000016 nodes reachable from node 0: 1 out of 3 max BFS level: 1 method 6: apply unary operator -BFS time in seconds: 0.000028 +BFS time in seconds: 0.000024 nodes reachable from node 0: 1 out of 3 max BFS level: 1 method 6: same but check each result -BFS time in seconds: 0.000012 +BFS time in seconds: 0.000013 nodes reachable from node 0: 1 out of 3 max BFS level: 1 ntuples: 8 nrows 4 ncols 4 -time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.000034 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000046 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000029 +A = (C+C')/2 time 0.000026 matrix 4 by 4, 8 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000051 +BFS time in seconds: 0.000069 nodes reachable from node 0: 2 out of 4 max BFS level: 2 method 5: same but check each result -BFS time in seconds: 0.000011 +BFS time in seconds: 0.000019 nodes reachable from node 0: 2 out of 4 max BFS level: 2 method 6: apply unary operator -BFS time in seconds: 0.000013 +BFS time in seconds: 0.000035 nodes reachable from node 0: 2 out of 4 max BFS level: 2 method 6: same but check each result -BFS time in seconds: 0.000009 +BFS time in seconds: 0.000024 nodes reachable from node 0: 2 out of 4 max BFS level: 2 @@ -112,7 +112,7 @@ must be identical to monoid operator z=or(x,y) of type [bool] ================================================================= SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. http://suitesparse.com Dept of Computer Sci. & Eng, Texas A&M University -SuiteSparse:GraphBLAS version: 1.1.0 Date: Dec 1, 2017 +SuiteSparse:GraphBLAS version: 1.1.2 Date: Dec 28, 2017 SuiteSparse:GraphBLAS, Copyright 2017, Timothy A. Davis Licensed under the Apache License, Version 2.0 (the "License"); @@ -137,29 +137,29 @@ GraphBLAS status: GrB_SUCCESS ntuples: 10 nrows 4 ncols 4 time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000023 +time to build the graph with GrB_Matrix_build: 0.000040 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000029 +A = (C+C')/2 time 0.000033 matrix 4 by 4, 14 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000049 +BFS time in seconds: 0.000081 nodes reachable from node 0: 4 out of 4 max BFS level: 2 method 5: same but check each result -BFS time in seconds: 0.000019 +BFS time in seconds: 0.000018 nodes reachable from node 0: 4 out of 4 max BFS level: 2 method 6: apply unary operator -BFS time in seconds: 0.000013 +BFS time in seconds: 0.000030 nodes reachable from node 0: 4 out of 4 max BFS level: 2 method 6: same but check each result -BFS time in seconds: 0.000010 +BFS time in seconds: 0.000017 nodes reachable from node 0: 4 out of 4 max BFS level: 2 @@ -178,7 +178,7 @@ must be identical to monoid operator z=or(x,y) of type [bool] ================================================================= SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. http://suitesparse.com Dept of Computer Sci. & Eng, Texas A&M University -SuiteSparse:GraphBLAS version: 1.1.0 Date: Dec 1, 2017 +SuiteSparse:GraphBLAS version: 1.1.2 Date: Dec 28, 2017 SuiteSparse:GraphBLAS, Copyright 2017, Timothy A. Davis Licensed under the Apache License, Version 2.0 (the "License"); @@ -202,20 +202,20 @@ GraphBLAS status: GrB_SUCCESS ntuples: 8 nrows 4 ncols 3 -time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000041 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000039 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000030 +time to construct augmented system: 0.000039 matrix 7 by 7, 16 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000068 +BFS time in seconds: 0.000086 nodes reachable from node 0: 7 out of 7 max BFS level: 5 method 5: same but check each result -BFS time in seconds: 0.000032 +BFS time in seconds: 0.000031 nodes reachable from node 0: 7 out of 7 max BFS level: 5 @@ -225,188 +225,188 @@ nodes reachable from node 0: 7 out of 7 max BFS level: 5 method 6: same but check each result -BFS time in seconds: 0.000038 +BFS time in seconds: 0.000032 nodes reachable from node 0: 7 out of 7 max BFS level: 5 ntuples: 438 nrows 219 ncols 85 -time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000033 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000050 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000058 +time to construct augmented system: 0.000086 matrix 304 by 304, 876 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000124 +BFS time in seconds: 0.000187 nodes reachable from node 0: 304 out of 304 max BFS level: 17 method 5: same but check each result -BFS time in seconds: 0.000097 +BFS time in seconds: 0.000135 nodes reachable from node 0: 304 out of 304 max BFS level: 17 method 6: apply unary operator -BFS time in seconds: 0.000122 +BFS time in seconds: 0.000174 nodes reachable from node 0: 304 out of 304 max BFS level: 17 method 6: same but check each result -BFS time in seconds: 0.000111 +BFS time in seconds: 0.000164 nodes reachable from node 0: 304 out of 304 max BFS level: 17 ntuples: 224 nrows 48 ncols 48 -time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000040 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000046 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000054 +A = (C+C')/2 time 0.000041 matrix 48 by 48, 400 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000072 +BFS time in seconds: 0.000096 nodes reachable from node 0: 48 out of 48 max BFS level: 4 method 5: same but check each result -BFS time in seconds: 0.000062 +BFS time in seconds: 0.000037 nodes reachable from node 0: 48 out of 48 max BFS level: 4 method 6: apply unary operator -BFS time in seconds: 0.000094 +BFS time in seconds: 0.000055 nodes reachable from node 0: 48 out of 48 max BFS level: 4 method 6: same but check each result -BFS time in seconds: 0.000060 +BFS time in seconds: 0.000041 nodes reachable from node 0: 48 out of 48 max BFS level: 4 ntuples: 147631 nrows 4884 ncols 4884 -time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.002670 +time to prune self-edges: 0.000003 +time to build the graph with GrB_Matrix_build: 0.002080 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.009048 +A = (C+C')/2 time 0.002319 matrix 4884 by 4884, 290378 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.004482 +BFS time in seconds: 0.001543 nodes reachable from node 0: 4810 out of 4884 max BFS level: 44 method 5: same but check each result -BFS time in seconds: 0.007952 +BFS time in seconds: 0.001528 nodes reachable from node 0: 4810 out of 4884 max BFS level: 44 method 6: apply unary operator -BFS time in seconds: 0.005607 +BFS time in seconds: 0.002379 nodes reachable from node 0: 4810 out of 4884 max BFS level: 44 method 6: same but check each result -BFS time in seconds: 0.004338 +BFS time in seconds: 0.001866 nodes reachable from node 0: 4810 out of 4884 max BFS level: 44 ntuples: 1069 nrows 183 ncols 183 time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.000043 +time to build the graph with GrB_Matrix_build: 0.000069 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000064 +A = (C+C')/2 time 0.000069 matrix 183 by 183, 1585 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000124 +BFS time in seconds: 0.000148 nodes reachable from node 0: 183 out of 183 max BFS level: 4 method 5: same but check each result -BFS time in seconds: 0.000097 +BFS time in seconds: 0.000076 nodes reachable from node 0: 183 out of 183 max BFS level: 4 method 6: apply unary operator -BFS time in seconds: 0.000118 +BFS time in seconds: 0.000087 nodes reachable from node 0: 183 out of 183 max BFS level: 4 method 6: same but check each result -BFS time in seconds: 0.000092 +BFS time in seconds: 0.000073 nodes reachable from node 0: 183 out of 183 max BFS level: 4 ntuples: 123 nrows 32 ncols 31 time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000045 +time to build the graph with GrB_Matrix_build: 0.000041 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000049 +time to construct augmented system: 0.000053 matrix 63 by 63, 246 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000125 +BFS time in seconds: 0.000114 nodes reachable from node 0: 63 out of 63 max BFS level: 6 method 5: same but check each result -BFS time in seconds: 0.000055 +BFS time in seconds: 0.000052 nodes reachable from node 0: 63 out of 63 max BFS level: 6 method 6: apply unary operator -BFS time in seconds: 0.000075 +BFS time in seconds: 0.000090 nodes reachable from node 0: 63 out of 63 max BFS level: 6 method 6: same but check each result -BFS time in seconds: 0.000058 +BFS time in seconds: 0.000059 nodes reachable from node 0: 63 out of 63 max BFS level: 6 ntuples: 123 nrows 31 ncols 32 time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000025 +time to build the graph with GrB_Matrix_build: 0.000049 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000032 +time to construct augmented system: 0.000051 matrix 63 by 63, 246 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000062 +BFS time in seconds: 0.000109 nodes reachable from node 0: 63 out of 63 max BFS level: 6 method 5: same but check each result -BFS time in seconds: 0.000031 +BFS time in seconds: 0.000047 nodes reachable from node 0: 63 out of 63 max BFS level: 6 method 6: apply unary operator -BFS time in seconds: 0.000043 +BFS time in seconds: 0.000069 nodes reachable from node 0: 63 out of 63 max BFS level: 6 method 6: same but check each result -BFS time in seconds: 0.000035 +BFS time in seconds: 0.000051 nodes reachable from node 0: 63 out of 63 max BFS level: 6 ntuples: 102 nrows 27 ncols 51 time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000034 +time to build the graph with GrB_Matrix_build: 0.000041 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000042 +time to construct augmented system: 0.000046 matrix 78 by 78, 204 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000109 +BFS time in seconds: 0.000122 nodes reachable from node 0: 78 out of 78 max BFS level: 10 @@ -421,167 +421,167 @@ nodes reachable from node 0: 78 out of 78 max BFS level: 10 method 6: same but check each result -BFS time in seconds: 0.000078 +BFS time in seconds: 0.000084 nodes reachable from node 0: 78 out of 78 max BFS level: 10 ntuples: 49920 nrows 492 ncols 490 -time to prune self-edges: 0.000003 -time to build the graph with GrB_Matrix_build: 0.001196 +time to prune self-edges: 0.000002 +time to build the graph with GrB_Matrix_build: 0.000581 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.005455 +time to construct augmented system: 0.005522 matrix 982 by 982, 99840 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.001255 +BFS time in seconds: 0.000744 nodes reachable from node 0: 933 out of 982 max BFS level: 5 method 5: same but check each result -BFS time in seconds: 0.001193 +BFS time in seconds: 0.000621 nodes reachable from node 0: 933 out of 982 max BFS level: 5 method 6: apply unary operator -BFS time in seconds: 0.001154 +BFS time in seconds: 0.000422 nodes reachable from node 0: 933 out of 982 max BFS level: 5 method 6: same but check each result -BFS time in seconds: 0.001123 +BFS time in seconds: 0.000353 nodes reachable from node 0: 933 out of 982 max BFS level: 5 ntuples: 299 nrows 67 ncols 67 -time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.000049 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000053 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000039 +A = (C+C')/2 time 0.000032 matrix 67 by 67, 576 entries, from stdin method 5: vector assign and reduce: -BFS time in seconds: 0.000095 +BFS time in seconds: 0.000107 nodes reachable from node 0: 67 out of 67 max BFS level: 5 method 5: same but check each result -BFS time in seconds: 0.000057 +BFS time in seconds: 0.000035 nodes reachable from node 0: 67 out of 67 max BFS level: 5 method 6: apply unary operator -BFS time in seconds: 0.000063 +BFS time in seconds: 0.000082 nodes reachable from node 0: 67 out of 67 max BFS level: 5 method 6: same but check each result -BFS time in seconds: 0.000061 +BFS time in seconds: 0.000060 nodes reachable from node 0: 67 out of 67 max BFS level: 5 -Wathen: nx 200 ny 200 n 120801 nz 1883201 method 0, time: 0.303 sec +Wathen: nx 200 ny 200 n 120801 nz 1883201 method 0, time: 0.316 sec method 5: vector assign and reduce: -BFS time in seconds: 0.047081 +BFS time in seconds: 0.027409 nodes reachable from node 0: 120801 out of 120801 max BFS level: 201 method 5: same but check each result -BFS time in seconds: 0.043068 +BFS time in seconds: 0.033958 nodes reachable from node 0: 120801 out of 120801 max BFS level: 201 method 6: apply unary operator -BFS time in seconds: 0.094824 +BFS time in seconds: 0.076350 nodes reachable from node 0: 120801 out of 120801 max BFS level: 201 method 6: same but check each result -BFS time in seconds: 0.090151 +BFS time in seconds: 0.071698 nodes reachable from node 0: 120801 out of 120801 max BFS level: 201 -random 10000 by 10000, nz: 199764, method 0 time 0.034 sec +random 10000 by 10000, nz: 199764, method 0 time 0.048 sec method 5: vector assign and reduce: -BFS time in seconds: 0.004977 +BFS time in seconds: 0.003944 nodes reachable from node 0: 10000 out of 10000 max BFS level: 5 method 5: same but check each result -BFS time in seconds: 0.005054 +BFS time in seconds: 0.003572 nodes reachable from node 0: 10000 out of 10000 max BFS level: 5 method 6: apply unary operator -BFS time in seconds: 0.008472 +BFS time in seconds: 0.003559 nodes reachable from node 0: 10000 out of 10000 max BFS level: 5 method 6: same but check each result -BFS time in seconds: 0.004559 +BFS time in seconds: 0.005103 nodes reachable from node 0: 10000 out of 10000 max BFS level: 5 -random 10000 by 10000, nz: 199764, method 1 time 0.031 sec +random 10000 by 10000, nz: 199764, method 1 time 0.033 sec method 5: vector assign and reduce: -BFS time in seconds: 0.004798 +BFS time in seconds: 0.003542 nodes reachable from node 0: 10000 out of 10000 max BFS level: 5 method 5: same but check each result -BFS time in seconds: 0.004876 +BFS time in seconds: 0.003093 nodes reachable from node 0: 10000 out of 10000 max BFS level: 5 method 6: apply unary operator -BFS time in seconds: 0.008542 +BFS time in seconds: 0.002650 nodes reachable from node 0: 10000 out of 10000 max BFS level: 5 method 6: same but check each result -BFS time in seconds: 0.004613 +BFS time in seconds: 0.003812 nodes reachable from node 0: 10000 out of 10000 max BFS level: 5 -random 100000 by 100000, nz: 19980358, method 0 time 4.406 sec +random 100000 by 100000, nz: 19980358, method 0 time 5.015 sec method 5: vector assign and reduce: -BFS time in seconds: 0.264889 +BFS time in seconds: 0.138870 nodes reachable from node 0: 100000 out of 100000 max BFS level: 4 method 5: same but check each result -BFS time in seconds: 0.278009 +BFS time in seconds: 0.145375 nodes reachable from node 0: 100000 out of 100000 max BFS level: 4 method 6: apply unary operator -BFS time in seconds: 0.265488 +BFS time in seconds: 0.150154 nodes reachable from node 0: 100000 out of 100000 max BFS level: 4 method 6: same but check each result -BFS time in seconds: 0.277016 +BFS time in seconds: 0.142193 nodes reachable from node 0: 100000 out of 100000 max BFS level: 4 -random 100000 by 100000, nz: 19980358, method 1 time 3.869 sec +random 100000 by 100000, nz: 19980358, method 1 time 4.156 sec method 5: vector assign and reduce: -BFS time in seconds: 0.264430 +BFS time in seconds: 0.129175 nodes reachable from node 0: 100000 out of 100000 max BFS level: 4 method 5: same but check each result -BFS time in seconds: 0.273005 +BFS time in seconds: 0.153241 nodes reachable from node 0: 100000 out of 100000 max BFS level: 4 method 6: apply unary operator -BFS time in seconds: 0.269803 +BFS time in seconds: 0.143124 nodes reachable from node 0: 100000 out of 100000 max BFS level: 4 method 6: same but check each result -BFS time in seconds: 0.267878 +BFS time in seconds: 0.142926 nodes reachable from node 0: 100000 out of 100000 max BFS level: 4 diff --git a/GraphBLAS/Demo/Output/go3_out_laptop.txt b/GraphBLAS/Demo/Output/go3_out_laptop.txt new file mode 100644 index 0000000000..69b9f09c69 --- /dev/null +++ b/GraphBLAS/Demo/Output/go3_out_laptop.txt @@ -0,0 +1,190 @@ + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/roadNet-CA/roadNet-CA_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 5533214 +nrows 1965207 ncols 1965207 +time to prune self-edges: 0.071788 +time to build the graph with GrB_Matrix_build: 0.097648 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.169234 +matrix 1965207 by 1965207, 5533214 entries, from stdin + +total time to read A matrix: 2.587934 sec + +n 1965207 # edges 2766607 +U=triu(A) time: 0.061705 sec +read A, create U memory usage: 0.277805 GB +L=tril(A) time: 0.065431 sec +# triangles 120676 + +tricount time: 0.069911 sec (dot product method) +tri+prep time: 0.197047 sec (incl time to compute L and U) +compute C time: 0.067541 sec +reduce (C) time: 0.002370 sec +rate 14.04 million edges/sec (incl time for U=triu(A)) +rate 39.57 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.277805 GB +tricount time: 0.212219 sec (outer product method) +tri+prep time: 0.273924 sec (incl time to compute U) +compute C time: 0.206565 sec +reduce (C) time: 0.005654 sec +rate 10.10 million edges/sec (incl time for U=triu(A)) +rate 13.04 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.314854 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/roadNet-PA/roadNet-PA_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 3083796 +nrows 1088093 ncols 1088093 +time to prune self-edges: 0.037004 +time to build the graph with GrB_Matrix_build: 0.054127 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.094337 +matrix 1088093 by 1088093, 3083796 entries, from stdin + +total time to read A matrix: 1.390888 sec + +n 1088093 # edges 1541898 +U=triu(A) time: 0.031687 sec +read A, create U memory usage: 0.154541 GB +L=tril(A) time: 0.039813 sec +# triangles 67150 + +tricount time: 0.038971 sec (dot product method) +tri+prep time: 0.110471 sec (incl time to compute L and U) +compute C time: 0.037140 sec +reduce (C) time: 0.001831 sec +rate 13.96 million edges/sec (incl time for U=triu(A)) +rate 39.57 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.154541 GB +tricount time: 0.120344 sec (outer product method) +tri+prep time: 0.152031 sec (incl time to compute U) +compute C time: 0.117223 sec +reduce (C) time: 0.003121 sec +rate 10.14 million edges/sec (incl time for U=triu(A)) +rate 12.81 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.174968 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/roadNet-TX/roadNet-TX_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 3843320 +nrows 1379918 ncols 1379918 +time to prune self-edges: 0.038285 +time to build the graph with GrB_Matrix_build: 0.066937 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.116987 +matrix 1379918 by 1379918, 3843320 entries, from stdin + +total time to read A matrix: 1.729172 sec + +n 1379918 # edges 1921660 +U=triu(A) time: 0.043203 sec +read A, create U memory usage: 0.193557 GB +L=tril(A) time: 0.046873 sec +# triangles 82869 + +tricount time: 0.048759 sec (dot product method) +tri+prep time: 0.138835 sec (incl time to compute L and U) +compute C time: 0.046796 sec +reduce (C) time: 0.001963 sec +rate 13.84 million edges/sec (incl time for U=triu(A)) +rate 39.41 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.193557 GB +tricount time: 0.146706 sec (outer product method) +tri+prep time: 0.189909 sec (incl time to compute U) +compute C time: 0.142814 sec +reduce (C) time: 0.003892 sec +rate 10.12 million edges/sec (incl time for U=triu(A)) +rate 13.10 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.21841 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/DIMACS10/hugebubbles-00020_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63580358 +nrows 21198120 ncols 21198120 +time to prune self-edges: 0.419952 +time to build the graph with GrB_Matrix_build: 1.031380 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 5.350666 +matrix 21198120 by 21198120, 63580358 entries, from stdin + +total time to read A matrix: 33.598741 sec + +n 21198120 # edges 31790179 +U=triu(A) time: 0.591113 sec +read A, create U memory usage: 3.13682 GB +L=tril(A) time: 0.605525 sec +# triangles 0 + +tricount time: 1.993193 sec (dot product method) +tri+prep time: 3.189831 sec (incl time to compute L and U) +compute C time: 1.980140 sec +reduce (C) time: 0.013053 sec +rate 9.97 million edges/sec (incl time for U=triu(A)) +rate 15.95 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 3.13682 GB +tricount time: 5.877335 sec (outer product method) +tri+prep time: 6.468448 sec (incl time to compute U) +compute C time: 5.865297 sec +reduce (C) time: 0.012038 sec +rate 4.91 million edges/sec (incl time for U=triu(A)) +rate 5.41 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 3.39831 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/Freescale/circuit5M_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 53967852 +nrows 5558327 ncols 5558327 +time to prune self-edges: 0.414280 +time to build the graph with GrB_Matrix_build: 0.759158 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 1.164744 +matrix 5558327 by 5558327, 53967852 entries, from stdin + +total time to read A matrix: 24.766022 sec + +n 5558327 # edges 26983926 +U=triu(A) time: 0.324305 sec +read A, create U memory usage: 2.16518 GB +L=tril(A) time: 0.335087 sec +# triangles 31019473 + +tricount time: 2.776518 sec (dot product method) +tri+prep time: 3.435910 sec (incl time to compute L and U) +compute C time: 2.628667 sec +reduce (C) time: 0.147851 sec +rate 7.85 million edges/sec (incl time for U=triu(A)) +rate 9.72 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 2.16518 GB +tricount time: 191.970076 sec (outer product method) +tri+prep time: 192.294381 sec (incl time to compute U) +compute C time: 191.822473 sec +reduce (C) time: 0.147603 sec +rate 0.14 million edges/sec (incl time for U=triu(A)) +rate 0.14 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 2.16518 GB + diff --git a/GraphBLAS/Demo/Output/go_out_cholesky.txt b/GraphBLAS/Demo/Output/go_out_cholesky.txt new file mode 100644 index 0000000000..609cc65c7b --- /dev/null +++ b/GraphBLAS/Demo/Output/go_out_cholesky.txt @@ -0,0 +1,2662 @@ +output of Demo/go on cholesky.cse.tamu.edu +IBM Power8, using 1 core, with xlc compiler, version 13.1.5 + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/Mallya/lhr71_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 2985588 +nrows 70305 ncols 70305 +time to prune self-edges: 0.004912 +time to build the graph with GrB_Matrix_build: 0.023178 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.028733 +matrix 70305 by 70305, 2985588 entries, from stdin + +total time to read A matrix: 1.966791 sec + +n 70305 # edges 1492794 +U=triu(A) time: 0.011385 sec +read A, create U memory usage: 0.110294 GB +L=tril(A) time: 0.014782 sec +# triangles 160592 + +tricount time: 0.018637 sec (dot product method) +tri+prep time: 0.044804 sec (incl time to compute L and U) +compute C time: 0.017977 sec +reduce (C) time: 0.000660 sec +rate 33.32 million edges/sec (incl time for U=triu(A)) +rate 80.10 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.110294 GB +tricount time: 0.021585 sec (outer product method) +tri+prep time: 0.032970 sec (incl time to compute U) +compute C time: 0.020936 sec +reduce (C) time: 0.000649 sec +rate 45.28 million edges/sec (incl time for U=triu(A)) +rate 69.16 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.110294 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/Freescale/Freescale2_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 11489868 +nrows 2999350 ncols 2999350 +time to prune self-edges: 0.017766 +time to build the graph with GrB_Matrix_build: 0.105430 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.156402 +matrix 2999350 by 2999350, 11489868 entries, from stdin + +total time to read A matrix: 6.505454 sec + +n 2999350 # edges 5744934 +U=triu(A) time: 0.056871 sec +read A, create U memory usage: 0.53361 GB +L=tril(A) time: 0.074847 sec +# triangles 21027280 + +tricount time: 0.313944 sec (dot product method) +tri+prep time: 0.445661 sec (incl time to compute L and U) +compute C time: 0.284084 sec +reduce (C) time: 0.029860 sec +rate 12.89 million edges/sec (incl time for U=triu(A)) +rate 18.30 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.53361 GB +tricount time: 0.201472 sec (outer product method) +tri+prep time: 0.258342 sec (incl time to compute U) +compute C time: 0.171790 sec +reduce (C) time: 0.029681 sec +rate 22.24 million edges/sec (incl time for U=triu(A)) +rate 28.51 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.53361 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/cit-HepPh/cit-HepPh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 841754 +nrows 34547 ncols 34547 +time to prune self-edges: 0.001080 +time to build the graph with GrB_Matrix_build: 0.006861 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.010932 +matrix 34547 by 34547, 841754 entries, from stdin + +total time to read A matrix: 0.465318 sec + +n 34547 # edges 420877 +U=triu(A) time: 0.003287 sec +read A, create U memory usage: 0.0316858 GB +L=tril(A) time: 0.004534 sec +# triangles 1276868 + +tricount time: 0.133234 sec (dot product method) +tri+prep time: 0.141056 sec (incl time to compute L and U) +compute C time: 0.129008 sec +reduce (C) time: 0.004227 sec +rate 2.98 million edges/sec (incl time for U=triu(A)) +rate 3.16 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0316859 GB +tricount time: 0.037250 sec (outer product method) +tri+prep time: 0.040537 sec (incl time to compute U) +compute C time: 0.033023 sec +reduce (C) time: 0.004227 sec +rate 10.38 million edges/sec (incl time for U=triu(A)) +rate 11.30 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0316858 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/cit-HepTh/cit-HepTh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 704570 +nrows 27771 ncols 27771 +time to prune self-edges: 0.000910 +time to build the graph with GrB_Matrix_build: 0.005627 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.008743 +matrix 27771 by 27771, 704570 entries, from stdin + +total time to read A matrix: 0.429792 sec + +n 27771 # edges 352285 +U=triu(A) time: 0.002827 sec +read A, create U memory usage: 0.0264761 GB +L=tril(A) time: 0.003742 sec +# triangles 1478735 + +tricount time: 0.136782 sec (dot product method) +tri+prep time: 0.143351 sec (incl time to compute L and U) +compute C time: 0.132928 sec +reduce (C) time: 0.003854 sec +rate 2.46 million edges/sec (incl time for U=triu(A)) +rate 2.58 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0264762 GB +tricount time: 0.042246 sec (outer product method) +tri+prep time: 0.045072 sec (incl time to compute U) +compute C time: 0.038396 sec +reduce (C) time: 0.003850 sec +rate 7.82 million edges/sec (incl time for U=triu(A)) +rate 8.34 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0264761 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/email-EuAll/email-EuAll_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 728962 +nrows 265215 ncols 265215 +time to prune self-edges: 0.001267 +time to build the graph with GrB_Matrix_build: 0.006907 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.011556 +matrix 265215 by 265215, 728962 entries, from stdin + +total time to read A matrix: 0.468555 sec + +n 265215 # edges 364481 +U=triu(A) time: 0.004136 sec +read A, create U memory usage: 0.036852 GB +L=tril(A) time: 0.005164 sec +# triangles 267313 + +tricount time: 0.093431 sec (dot product method) +tri+prep time: 0.102731 sec (incl time to compute L and U) +compute C time: 0.092522 sec +reduce (C) time: 0.000908 sec +rate 3.55 million edges/sec (incl time for U=triu(A)) +rate 3.90 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0368521 GB +tricount time: 0.032275 sec (outer product method) +tri+prep time: 0.036411 sec (incl time to compute U) +compute C time: 0.031368 sec +reduce (C) time: 0.000906 sec +rate 10.01 million edges/sec (incl time for U=triu(A)) +rate 11.29 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.036852 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/soc-Epinions1/soc-Epinions1_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 811480 +nrows 75880 ncols 75880 +time to prune self-edges: 0.001615 +time to build the graph with GrB_Matrix_build: 0.007102 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.010661 +matrix 75880 by 75880, 811480 entries, from stdin + +total time to read A matrix: 0.464673 sec + +n 75880 # edges 405740 +U=triu(A) time: 0.003518 sec +read A, create U memory usage: 0.0322492 GB +L=tril(A) time: 0.004663 sec +# triangles 1624481 + +tricount time: 0.293079 sec (dot product method) +tri+prep time: 0.301261 sec (incl time to compute L and U) +compute C time: 0.289839 sec +reduce (C) time: 0.003240 sec +rate 1.35 million edges/sec (incl time for U=triu(A)) +rate 1.38 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0322493 GB +tricount time: 0.056098 sec (outer product method) +tri+prep time: 0.059616 sec (incl time to compute U) +compute C time: 0.052863 sec +reduce (C) time: 0.003235 sec +rate 6.81 million edges/sec (incl time for U=triu(A)) +rate 7.23 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0322492 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/soc-Slashdot0811/soc-Slashdot0811_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 938360 +nrows 77361 ncols 77361 +time to prune self-edges: 0.001524 +time to build the graph with GrB_Matrix_build: 0.008116 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.012438 +matrix 77361 by 77361, 938360 entries, from stdin + +total time to read A matrix: 0.581054 sec + +n 77361 # edges 469180 +U=triu(A) time: 0.003919 sec +read A, create U memory usage: 0.0368761 GB +L=tril(A) time: 0.005377 sec +# triangles 551724 + +tricount time: 0.239633 sec (dot product method) +tri+prep time: 0.248929 sec (incl time to compute L and U) +compute C time: 0.237634 sec +reduce (C) time: 0.002000 sec +rate 1.88 million edges/sec (incl time for U=triu(A)) +rate 1.96 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0368762 GB +tricount time: 0.049112 sec (outer product method) +tri+prep time: 0.053030 sec (incl time to compute U) +compute C time: 0.047117 sec +reduce (C) time: 0.001995 sec +rate 8.85 million edges/sec (incl time for U=triu(A)) +rate 9.55 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0368761 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/soc-Slashdot0902/soc-Slashdot0902_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 1008460 +nrows 82169 ncols 82169 +time to prune self-edges: 0.001630 +time to build the graph with GrB_Matrix_build: 0.008767 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.013502 +matrix 82169 by 82169, 1008460 entries, from stdin + +total time to read A matrix: 0.586469 sec + +n 82169 # edges 504230 +U=triu(A) time: 0.004211 sec +read A, create U memory usage: 0.039592 GB +L=tril(A) time: 0.005792 sec +# triangles 602592 + +tricount time: 0.255823 sec (dot product method) +tri+prep time: 0.265827 sec (incl time to compute L and U) +compute C time: 0.253668 sec +reduce (C) time: 0.002155 sec +rate 1.90 million edges/sec (incl time for U=triu(A)) +rate 1.97 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0395922 GB +tricount time: 0.053394 sec (outer product method) +tri+prep time: 0.057606 sec (incl time to compute U) +compute C time: 0.051245 sec +reduce (C) time: 0.002150 sec +rate 8.75 million edges/sec (incl time for U=triu(A)) +rate 9.44 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.039592 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/amazon0312/amazon0312_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 4699738 +nrows 400728 ncols 400728 +time to prune self-edges: 0.011596 +time to build the graph with GrB_Matrix_build: 0.041672 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.080759 +matrix 400728 by 400728, 4699738 entries, from stdin + +total time to read A matrix: 2.618798 sec + +n 400728 # edges 2349869 +U=triu(A) time: 0.020193 sec +read A, create U memory usage: 0.18522 GB +L=tril(A) time: 0.027865 sec +# triangles 3686467 + +tricount time: 0.343501 sec (dot product method) +tri+prep time: 0.391560 sec (incl time to compute L and U) +compute C time: 0.323302 sec +reduce (C) time: 0.020199 sec +rate 6.00 million edges/sec (incl time for U=triu(A)) +rate 6.84 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.185221 GB +tricount time: 0.187840 sec (outer product method) +tri+prep time: 0.208034 sec (incl time to compute U) +compute C time: 0.167650 sec +reduce (C) time: 0.020191 sec +rate 11.30 million edges/sec (incl time for U=triu(A)) +rate 12.51 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.18522 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/amazon0505/amazon0505_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 4878874 +nrows 410237 ncols 410237 +time to prune self-edges: 0.009134 +time to build the graph with GrB_Matrix_build: 0.041600 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.084357 +matrix 410237 by 410237, 4878874 entries, from stdin + +total time to read A matrix: 2.747226 sec + +n 410237 # edges 2439437 +U=triu(A) time: 0.020959 sec +read A, create U memory usage: 0.19205 GB +L=tril(A) time: 0.028857 sec +# triangles 3951063 + +tricount time: 0.363795 sec (dot product method) +tri+prep time: 0.413610 sec (incl time to compute L and U) +compute C time: 0.342556 sec +reduce (C) time: 0.021239 sec +rate 5.90 million edges/sec (incl time for U=triu(A)) +rate 6.71 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.19205 GB +tricount time: 0.202857 sec (outer product method) +tri+prep time: 0.223816 sec (incl time to compute U) +compute C time: 0.181619 sec +reduce (C) time: 0.021237 sec +rate 10.90 million edges/sec (incl time for U=triu(A)) +rate 12.03 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.19205 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/amazon0601/amazon0601_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 4886816 +nrows 403395 ncols 403395 +time to prune self-edges: 0.009976 +time to build the graph with GrB_Matrix_build: 0.041995 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.082966 +matrix 403395 by 403395, 4886816 entries, from stdin + +total time to read A matrix: 2.760841 sec + +n 403395 # edges 2443408 +U=triu(A) time: 0.021047 sec +read A, create U memory usage: 0.192062 GB +L=tril(A) time: 0.028901 sec +# triangles 3986507 + +tricount time: 0.380689 sec (dot product method) +tri+prep time: 0.430637 sec (incl time to compute L and U) +compute C time: 0.359379 sec +reduce (C) time: 0.021309 sec +rate 5.67 million edges/sec (incl time for U=triu(A)) +rate 6.42 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.192062 GB +tricount time: 0.210242 sec (outer product method) +tri+prep time: 0.231289 sec (incl time to compute U) +compute C time: 0.188928 sec +reduce (C) time: 0.021315 sec +rate 10.56 million edges/sec (incl time for U=triu(A)) +rate 11.62 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.192062 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/flickrEdges/flickrEdges_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 4633896 +nrows 105939 ncols 105939 +time to prune self-edges: 0.010720 +time to build the graph with GrB_Matrix_build: 0.037170 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.067008 +matrix 105939 by 105939, 4633896 entries, from stdin + +total time to read A matrix: 2.408499 sec + +n 105939 # edges 2316948 +U=triu(A) time: 0.017995 sec +read A, create U memory usage: 0.171059 GB +L=tril(A) time: 0.023365 sec +# triangles 107987357 + +tricount time: 4.394314 sec (dot product method) +tri+prep time: 4.435674 sec (incl time to compute L and U) +compute C time: 4.368165 sec +reduce (C) time: 0.026150 sec +rate 0.52 million edges/sec (incl time for U=triu(A)) +rate 0.53 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.171059 GB +tricount time: 0.966344 sec (outer product method) +tri+prep time: 0.984340 sec (incl time to compute U) +compute C time: 0.940073 sec +reduce (C) time: 0.026271 sec +rate 2.35 million edges/sec (incl time for U=triu(A)) +rate 2.40 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.171059 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/cit-Patents/cit-Patents_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 33037894 +nrows 3774769 ncols 3774769 +time to prune self-edges: 0.050978 +time to build the graph with GrB_Matrix_build: 0.291000 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 1.521079 +matrix 3774769 by 3774769, 33037894 entries, from stdin + +total time to read A matrix: 20.278532 sec + +n 3774769 # edges 16518947 +U=triu(A) time: 0.144734 sec +read A, create U memory usage: 1.34036 GB +L=tril(A) time: 0.199077 sec +# triangles 7515023 + +tricount time: 3.630323 sec (dot product method) +tri+prep time: 3.974134 sec (incl time to compute L and U) +compute C time: 3.573438 sec +reduce (C) time: 0.056885 sec +rate 4.16 million edges/sec (incl time for U=triu(A)) +rate 4.55 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 1.34036 GB +tricount time: 2.274834 sec (outer product method) +tri+prep time: 2.419568 sec (incl time to compute U) +compute C time: 2.217888 sec +reduce (C) time: 0.056946 sec +rate 6.83 million edges/sec (incl time for U=triu(A)) +rate 7.26 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 1.34036 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/SNAP/soc-LiveJournal1_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 85702474 +nrows 4847572 ncols 4847572 +time to prune self-edges: 0.144778 +time to build the graph with GrB_Matrix_build: 0.698713 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 3.206422 +matrix 4847572 by 4847572, 85702474 entries, from stdin + +total time to read A matrix: 49.964708 sec + +n 4847572 # edges 42851237 +U=triu(A) time: 0.421525 sec +read A, create U memory usage: 3.27919 GB +L=tril(A) time: 0.499519 sec +# triangles 285730264 + +tricount time: 27.249647 sec (dot product method) +tri+prep time: 28.170691 sec (incl time to compute L and U) +compute C time: 26.874902 sec +reduce (C) time: 0.374745 sec +rate 1.52 million edges/sec (incl time for U=triu(A)) +rate 1.57 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 3.27919 GB +tricount time: 9.607650 sec (outer product method) +tri+prep time: 10.029175 sec (incl time to compute U) +compute C time: 9.233189 sec +reduce (C) time: 0.374460 sec +rate 4.27 million edges/sec (incl time for U=triu(A)) +rate 4.46 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 3.27919 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/Gleich/wb-edu_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 92472210 +nrows 9845726 ncols 9845726 +time to prune self-edges: 0.139663 +time to build the graph with GrB_Matrix_build: 0.777836 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 1.139699 +matrix 9845726 by 9845726, 92472210 entries, from stdin + +total time to read A matrix: 53.278709 sec + +n 9845726 # edges 46236105 +U=triu(A) time: 0.403363 sec +read A, create U memory usage: 3.72283 GB +L=tril(A) time: 0.526596 sec +# triangles 254718147 + +tricount time: 6.375060 sec (dot product method) +tri+prep time: 7.305020 sec (incl time to compute L and U) +compute C time: 5.947842 sec +reduce (C) time: 0.427218 sec +rate 6.33 million edges/sec (incl time for U=triu(A)) +rate 7.25 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 3.72283 GB +tricount time: 4.320846 sec (outer product method) +tri+prep time: 4.724210 sec (incl time to compute U) +compute C time: 3.894145 sec +reduce (C) time: 0.426702 sec +rate 9.79 million edges/sec (incl time for U=triu(A)) +rate 10.70 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 3.72283 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/amazon0302/amazon0302_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 1799584 +nrows 262112 ncols 262112 +time to prune self-edges: 0.007129 +time to build the graph with GrB_Matrix_build: 0.018676 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.026584 +matrix 262112 by 262112, 1799584 entries, from stdin + +total time to read A matrix: 1.025654 sec + +n 262112 # edges 899792 +U=triu(A) time: 0.008241 sec +read A, create U memory usage: 0.0752702 GB +L=tril(A) time: 0.011964 sec +# triangles 717719 + +tricount time: 0.065772 sec (dot product method) +tri+prep time: 0.085977 sec (incl time to compute L and U) +compute C time: 0.059646 sec +reduce (C) time: 0.006126 sec +rate 10.47 million edges/sec (incl time for U=triu(A)) +rate 13.68 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0752703 GB +tricount time: 0.044843 sec (outer product method) +tri+prep time: 0.053085 sec (incl time to compute U) +compute C time: 0.038727 sec +reduce (C) time: 0.006116 sec +rate 16.95 million edges/sec (incl time for U=triu(A)) +rate 20.07 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0752702 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/as-caida20071105/as-caida20071105_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 106762 +nrows 26476 ncols 26476 +time to prune self-edges: 0.000137 +time to build the graph with GrB_Matrix_build: 0.001120 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001429 +matrix 26476 by 26476, 106762 entries, from stdin + +total time to read A matrix: 0.077086 sec + +n 26476 # edges 53381 +U=triu(A) time: 0.000548 sec +read A, create U memory usage: 0.0049032 GB +L=tril(A) time: 0.000740 sec +# triangles 36365 + +tricount time: 0.007779 sec (dot product method) +tri+prep time: 0.009067 sec (incl time to compute L and U) +compute C time: 0.007599 sec +reduce (C) time: 0.000180 sec +rate 5.89 million edges/sec (incl time for U=triu(A)) +rate 6.86 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00490331 GB +tricount time: 0.004854 sec (outer product method) +tri+prep time: 0.005402 sec (incl time to compute U) +compute C time: 0.004676 sec +reduce (C) time: 0.000178 sec +rate 9.88 million edges/sec (incl time for U=triu(A)) +rate 11.00 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0049032 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/as20000102/as20000102_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 25144 +nrows 6475 ncols 6475 +time to prune self-edges: 0.000033 +time to build the graph with GrB_Matrix_build: 0.000283 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000338 +matrix 6475 by 6475, 25144 entries, from stdin + +total time to read A matrix: 0.023140 sec + +n 6475 # edges 12572 +U=triu(A) time: 0.000134 sec +read A, create U memory usage: 0.00116491 GB +L=tril(A) time: 0.000177 sec +# triangles 6584 + +tricount time: 0.001009 sec (dot product method) +tri+prep time: 0.001319 sec (incl time to compute L and U) +compute C time: 0.000972 sec +reduce (C) time: 0.000037 sec +rate 9.53 million edges/sec (incl time for U=triu(A)) +rate 12.46 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00116503 GB +tricount time: 0.000484 sec (outer product method) +tri+prep time: 0.000618 sec (incl time to compute U) +compute C time: 0.000448 sec +reduce (C) time: 0.000036 sec +rate 20.35 million edges/sec (incl time for U=triu(A)) +rate 25.99 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00116491 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-AstroPh/ca-AstroPh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 396100 +nrows 18773 ncols 18773 +time to prune self-edges: 0.000558 +time to build the graph with GrB_Matrix_build: 0.003208 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.004871 +matrix 18773 by 18773, 396100 entries, from stdin + +total time to read A matrix: 0.225209 sec + +n 18773 # edges 198050 +U=triu(A) time: 0.001547 sec +read A, create U memory usage: 0.0150112 GB +L=tril(A) time: 0.002147 sec +# triangles 1351441 + +tricount time: 0.071591 sec (dot product method) +tri+prep time: 0.075285 sec (incl time to compute L and U) +compute C time: 0.069272 sec +reduce (C) time: 0.002319 sec +rate 2.63 million edges/sec (incl time for U=triu(A)) +rate 2.77 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0150114 GB +tricount time: 0.020007 sec (outer product method) +tri+prep time: 0.021554 sec (incl time to compute U) +compute C time: 0.017691 sec +reduce (C) time: 0.002316 sec +rate 9.19 million edges/sec (incl time for U=triu(A)) +rate 9.90 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0150112 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-CondMat/ca-CondMat_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 186878 +nrows 23134 ncols 23134 +time to prune self-edges: 0.000239 +time to build the graph with GrB_Matrix_build: 0.001674 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.002393 +matrix 23134 by 23134, 186878 entries, from stdin + +total time to read A matrix: 0.127249 sec + +n 23134 # edges 93439 +U=triu(A) time: 0.000811 sec +read A, create U memory usage: 0.0076537 GB +L=tril(A) time: 0.001171 sec +# triangles 173361 + +tricount time: 0.011522 sec (dot product method) +tri+prep time: 0.013504 sec (incl time to compute L and U) +compute C time: 0.010670 sec +reduce (C) time: 0.000851 sec +rate 6.92 million edges/sec (incl time for U=triu(A)) +rate 8.11 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00765381 GB +tricount time: 0.005120 sec (outer product method) +tri+prep time: 0.005932 sec (incl time to compute U) +compute C time: 0.004270 sec +reduce (C) time: 0.000850 sec +rate 15.75 million edges/sec (incl time for U=triu(A)) +rate 18.25 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0076537 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-GrQc/ca-GrQc_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 28968 +nrows 5243 ncols 5243 +time to prune self-edges: 0.000038 +time to build the graph with GrB_Matrix_build: 0.000302 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000357 +matrix 5243 by 5243, 28968 entries, from stdin + +total time to read A matrix: 0.027812 sec + +n 5243 # edges 14484 +U=triu(A) time: 0.000147 sec +read A, create U memory usage: 0.0012533 GB +L=tril(A) time: 0.000205 sec +# triangles 48260 + +tricount time: 0.001174 sec (dot product method) +tri+prep time: 0.001525 sec (incl time to compute L and U) +compute C time: 0.001053 sec +reduce (C) time: 0.000121 sec +rate 9.49 million edges/sec (incl time for U=triu(A)) +rate 12.33 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00125341 GB +tricount time: 0.000659 sec (outer product method) +tri+prep time: 0.000806 sec (incl time to compute U) +compute C time: 0.000540 sec +reduce (C) time: 0.000120 sec +rate 17.97 million edges/sec (incl time for U=triu(A)) +rate 21.97 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0012533 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-HepPh/ca-HepPh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 236978 +nrows 12009 ncols 12009 +time to prune self-edges: 0.000302 +time to build the graph with GrB_Matrix_build: 0.001883 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.002640 +matrix 12009 by 12009, 236978 entries, from stdin + +total time to read A matrix: 0.141103 sec + +n 12009 # edges 118489 +U=triu(A) time: 0.000917 sec +read A, create U memory usage: 0.0090123 GB +L=tril(A) time: 0.001266 sec +# triangles 3358499 + +tricount time: 0.060435 sec (dot product method) +tri+prep time: 0.062619 sec (incl time to compute L and U) +compute C time: 0.059005 sec +reduce (C) time: 0.001430 sec +rate 1.89 million edges/sec (incl time for U=triu(A)) +rate 1.96 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00901241 GB +tricount time: 0.018904 sec (outer product method) +tri+prep time: 0.019821 sec (incl time to compute U) +compute C time: 0.017476 sec +reduce (C) time: 0.001428 sec +rate 5.98 million edges/sec (incl time for U=triu(A)) +rate 6.27 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0090123 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-HepTh/ca-HepTh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 51946 +nrows 9878 ncols 9878 +time to prune self-edges: 0.000067 +time to build the graph with GrB_Matrix_build: 0.000530 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000666 +matrix 9878 by 9878, 51946 entries, from stdin + +total time to read A matrix: 0.029618 sec + +n 9878 # edges 25973 +U=triu(A) time: 0.000254 sec +read A, create U memory usage: 0.00226591 GB +L=tril(A) time: 0.000367 sec +# triangles 28339 + +tricount time: 0.001994 sec (dot product method) +tri+prep time: 0.002615 sec (incl time to compute L and U) +compute C time: 0.001818 sec +reduce (C) time: 0.000175 sec +rate 9.93 million edges/sec (incl time for U=triu(A)) +rate 13.03 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00226602 GB +tricount time: 0.001087 sec (outer product method) +tri+prep time: 0.001341 sec (incl time to compute U) +compute C time: 0.000913 sec +reduce (C) time: 0.000174 sec +rate 19.36 million edges/sec (incl time for U=triu(A)) +rate 23.89 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00226591 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/email-Enron/email-Enron_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 367662 +nrows 36693 ncols 36693 +time to prune self-edges: 0.000657 +time to build the graph with GrB_Matrix_build: 0.003243 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.004390 +matrix 36693 by 36693, 367662 entries, from stdin + +total time to read A matrix: 0.217116 sec + +n 36693 # edges 183831 +U=triu(A) time: 0.001546 sec +read A, create U memory usage: 0.0147043 GB +L=tril(A) time: 0.002199 sec +# triangles 727044 + +tricount time: 0.069082 sec (dot product method) +tri+prep time: 0.072827 sec (incl time to compute L and U) +compute C time: 0.067235 sec +reduce (C) time: 0.001847 sec +rate 2.52 million edges/sec (incl time for U=triu(A)) +rate 2.66 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0147044 GB +tricount time: 0.020729 sec (outer product method) +tri+prep time: 0.022275 sec (incl time to compute U) +compute C time: 0.018886 sec +reduce (C) time: 0.001843 sec +rate 8.25 million edges/sec (incl time for U=triu(A)) +rate 8.87 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0147043 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/facebook_combined/facebook_combined_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 176468 +nrows 4040 ncols 4040 +time to prune self-edges: 0.000225 +time to build the graph with GrB_Matrix_build: 0.001338 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001591 +matrix 4040 by 4040, 176468 entries, from stdin + +total time to read A matrix: 0.110530 sec + +n 4040 # edges 88234 +U=triu(A) time: 0.000646 sec +read A, create U memory usage: 0.00651518 GB +L=tril(A) time: 0.000869 sec +# triangles 1612010 + +tricount time: 0.044190 sec (dot product method) +tri+prep time: 0.045705 sec (incl time to compute L and U) +compute C time: 0.043045 sec +reduce (C) time: 0.001145 sec +rate 1.93 million edges/sec (incl time for U=triu(A)) +rate 2.00 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00651529 GB +tricount time: 0.012053 sec (outer product method) +tri+prep time: 0.012698 sec (incl time to compute U) +compute C time: 0.010910 sec +reduce (C) time: 0.001143 sec +rate 6.95 million edges/sec (incl time for U=triu(A)) +rate 7.32 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00651518 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/loc-brightkite_edges/loc-brightkite_edges_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 428156 +nrows 58229 ncols 58229 +time to prune self-edges: 0.000621 +time to build the graph with GrB_Matrix_build: 0.003956 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.005477 +matrix 58229 by 58229, 428156 entries, from stdin + +total time to read A matrix: 0.240456 sec + +n 58229 # edges 214078 +U=triu(A) time: 0.001936 sec +read A, create U memory usage: 0.0177435 GB +L=tril(A) time: 0.002712 sec +# triangles 494728 + +tricount time: 0.047451 sec (dot product method) +tri+prep time: 0.052099 sec (incl time to compute L and U) +compute C time: 0.046145 sec +reduce (C) time: 0.001306 sec +rate 4.11 million edges/sec (incl time for U=triu(A)) +rate 4.51 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0177436 GB +tricount time: 0.013999 sec (outer product method) +tri+prep time: 0.015935 sec (incl time to compute U) +compute C time: 0.012696 sec +reduce (C) time: 0.001303 sec +rate 13.43 million edges/sec (incl time for U=triu(A)) +rate 15.29 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0177435 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/loc-gowalla_edges/loc-gowalla_edges_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 1900654 +nrows 196592 ncols 196592 +time to prune self-edges: 0.006710 +time to build the graph with GrB_Matrix_build: 0.018767 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.026429 +matrix 196592 by 196592, 1900654 entries, from stdin + +total time to read A matrix: 1.080389 sec + +n 196592 # edges 950327 +U=triu(A) time: 0.008131 sec +read A, create U memory usage: 0.076288 GB +L=tril(A) time: 0.011296 sec +# triangles 2273138 + +tricount time: 0.328888 sec (dot product method) +tri+prep time: 0.348315 sec (incl time to compute L and U) +compute C time: 0.321791 sec +reduce (C) time: 0.007097 sec +rate 2.73 million edges/sec (incl time for U=triu(A)) +rate 2.89 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0762881 GB +tricount time: 0.191280 sec (outer product method) +tri+prep time: 0.199411 sec (incl time to compute U) +compute C time: 0.184187 sec +reduce (C) time: 0.007093 sec +rate 4.77 million edges/sec (incl time for U=triu(A)) +rate 4.97 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.076288 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010331/oregon1_010331_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 44004 +nrows 10671 ncols 10671 +time to prune self-edges: 0.000057 +time to build the graph with GrB_Matrix_build: 0.000476 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000571 +matrix 10671 by 10671, 44004 entries, from stdin + +total time to read A matrix: 0.044254 sec + +n 10671 # edges 22002 +U=triu(A) time: 0.000227 sec +read A, create U memory usage: 0.00201171 GB +L=tril(A) time: 0.000302 sec +# triangles 17144 + +tricount time: 0.002367 sec (dot product method) +tri+prep time: 0.002895 sec (incl time to compute L and U) +compute C time: 0.002274 sec +reduce (C) time: 0.000093 sec +rate 7.60 million edges/sec (incl time for U=triu(A)) +rate 9.30 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00201183 GB +tricount time: 0.002095 sec (outer product method) +tri+prep time: 0.002322 sec (incl time to compute U) +compute C time: 0.002005 sec +reduce (C) time: 0.000090 sec +rate 9.48 million edges/sec (incl time for U=triu(A)) +rate 10.50 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00201171 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010407/oregon1_010407_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 43998 +nrows 10730 ncols 10730 +time to prune self-edges: 0.000058 +time to build the graph with GrB_Matrix_build: 0.000448 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000568 +matrix 10730 by 10730, 43998 entries, from stdin + +total time to read A matrix: 0.044867 sec + +n 10730 # edges 21999 +U=triu(A) time: 0.000227 sec +read A, create U memory usage: 0.00201386 GB +L=tril(A) time: 0.000303 sec +# triangles 15834 + +tricount time: 0.002348 sec (dot product method) +tri+prep time: 0.002877 sec (incl time to compute L and U) +compute C time: 0.002258 sec +reduce (C) time: 0.000090 sec +rate 7.65 million edges/sec (incl time for U=triu(A)) +rate 9.37 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00201397 GB +tricount time: 0.002084 sec (outer product method) +tri+prep time: 0.002310 sec (incl time to compute U) +compute C time: 0.001995 sec +reduce (C) time: 0.000089 sec +rate 9.52 million edges/sec (incl time for U=triu(A)) +rate 10.56 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00201386 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010414/oregon1_010414_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 44938 +nrows 10791 ncols 10791 +time to prune self-edges: 0.000059 +time to build the graph with GrB_Matrix_build: 0.000463 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000581 +matrix 10791 by 10791, 44938 entries, from stdin + +total time to read A matrix: 0.034002 sec + +n 10791 # edges 22469 +U=triu(A) time: 0.000226 sec +read A, create U memory usage: 0.00205014 GB +L=tril(A) time: 0.000314 sec +# triangles 18237 + +tricount time: 0.002463 sec (dot product method) +tri+prep time: 0.003003 sec (incl time to compute L and U) +compute C time: 0.002367 sec +reduce (C) time: 0.000096 sec +rate 7.48 million edges/sec (incl time for U=triu(A)) +rate 9.12 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00205025 GB +tricount time: 0.002233 sec (outer product method) +tri+prep time: 0.002458 sec (incl time to compute U) +compute C time: 0.002138 sec +reduce (C) time: 0.000095 sec +rate 9.14 million edges/sec (incl time for U=triu(A)) +rate 10.06 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00205014 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010421/oregon1_010421_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 45494 +nrows 10860 ncols 10860 +time to prune self-edges: 0.000059 +time to build the graph with GrB_Matrix_build: 0.000479 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000587 +matrix 10860 by 10860, 45494 entries, from stdin + +total time to read A matrix: 0.045605 sec + +n 10860 # edges 22747 +U=triu(A) time: 0.000229 sec +read A, create U memory usage: 0.00207291 GB +L=tril(A) time: 0.000310 sec +# triangles 19108 + +tricount time: 0.002550 sec (dot product method) +tri+prep time: 0.003089 sec (incl time to compute L and U) +compute C time: 0.002452 sec +reduce (C) time: 0.000098 sec +rate 7.36 million edges/sec (incl time for U=triu(A)) +rate 8.92 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00207303 GB +tricount time: 0.002285 sec (outer product method) +tri+prep time: 0.002513 sec (incl time to compute U) +compute C time: 0.002188 sec +reduce (C) time: 0.000097 sec +rate 9.05 million edges/sec (incl time for U=triu(A)) +rate 9.96 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00207291 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010428/oregon1_010428_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 44986 +nrows 10887 ncols 10887 +time to prune self-edges: 0.000058 +time to build the graph with GrB_Matrix_build: 0.000524 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000584 +matrix 10887 by 10887, 44986 entries, from stdin + +total time to read A matrix: 0.046610 sec + +n 10887 # edges 22493 +U=triu(A) time: 0.000227 sec +read A, create U memory usage: 0.00205571 GB +L=tril(A) time: 0.000310 sec +# triangles 17645 + +tricount time: 0.002429 sec (dot product method) +tri+prep time: 0.002967 sec (incl time to compute L and U) +compute C time: 0.002335 sec +reduce (C) time: 0.000094 sec +rate 7.58 million edges/sec (incl time for U=triu(A)) +rate 9.26 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00205582 GB +tricount time: 0.002253 sec (outer product method) +tri+prep time: 0.002479 sec (incl time to compute U) +compute C time: 0.002160 sec +reduce (C) time: 0.000093 sec +rate 9.07 million edges/sec (incl time for U=triu(A)) +rate 9.99 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00205571 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010505/oregon1_010505_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 45214 +nrows 10944 ncols 10944 +time to prune self-edges: 0.000059 +time to build the graph with GrB_Matrix_build: 0.000471 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000590 +matrix 10944 by 10944, 45214 entries, from stdin + +total time to read A matrix: 0.035653 sec + +n 10944 # edges 22607 +U=triu(A) time: 0.000229 sec +read A, create U memory usage: 0.00206619 GB +L=tril(A) time: 0.000310 sec +# triangles 17597 + +tricount time: 0.002428 sec (dot product method) +tri+prep time: 0.002967 sec (incl time to compute L and U) +compute C time: 0.002334 sec +reduce (C) time: 0.000094 sec +rate 7.62 million edges/sec (incl time for U=triu(A)) +rate 9.31 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00206631 GB +tricount time: 0.002248 sec (outer product method) +tri+prep time: 0.002477 sec (incl time to compute U) +compute C time: 0.002156 sec +reduce (C) time: 0.000092 sec +rate 9.13 million edges/sec (incl time for U=triu(A)) +rate 10.06 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00206619 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010512/oregon1_010512_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 45354 +nrows 11012 ncols 11012 +time to prune self-edges: 0.000059 +time to build the graph with GrB_Matrix_build: 0.000475 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000589 +matrix 11012 by 11012, 45354 entries, from stdin + +total time to read A matrix: 0.038034 sec + +n 11012 # edges 22677 +U=triu(A) time: 0.000230 sec +read A, create U memory usage: 0.00207395 GB +L=tril(A) time: 0.000309 sec +# triangles 17598 + +tricount time: 0.002458 sec (dot product method) +tri+prep time: 0.002997 sec (incl time to compute L and U) +compute C time: 0.002364 sec +reduce (C) time: 0.000094 sec +rate 7.57 million edges/sec (incl time for U=triu(A)) +rate 9.23 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00207406 GB +tricount time: 0.002265 sec (outer product method) +tri+prep time: 0.002495 sec (incl time to compute U) +compute C time: 0.002172 sec +reduce (C) time: 0.000093 sec +rate 9.09 million edges/sec (incl time for U=triu(A)) +rate 10.01 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00207395 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010519/oregon1_010519_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 45448 +nrows 11052 ncols 11052 +time to prune self-edges: 0.000059 +time to build the graph with GrB_Matrix_build: 0.000496 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000589 +matrix 11052 by 11052, 45448 entries, from stdin + +total time to read A matrix: 0.051704 sec + +n 11052 # edges 22724 +U=triu(A) time: 0.000230 sec +read A, create U memory usage: 0.00207894 GB +L=tril(A) time: 0.000310 sec +# triangles 17677 + +tricount time: 0.002453 sec (dot product method) +tri+prep time: 0.002994 sec (incl time to compute L and U) +compute C time: 0.002359 sec +reduce (C) time: 0.000094 sec +rate 7.59 million edges/sec (incl time for U=triu(A)) +rate 9.26 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00207905 GB +tricount time: 0.002261 sec (outer product method) +tri+prep time: 0.002492 sec (incl time to compute U) +compute C time: 0.002169 sec +reduce (C) time: 0.000093 sec +rate 9.12 million edges/sec (incl time for U=triu(A)) +rate 10.05 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00207894 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010526/oregon1_010526_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 46818 +nrows 11175 ncols 11175 +time to prune self-edges: 0.000061 +time to build the graph with GrB_Matrix_build: 0.000480 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000603 +matrix 11175 by 11175, 46818 entries, from stdin + +total time to read A matrix: 0.048066 sec + +n 11175 # edges 23409 +U=triu(A) time: 0.000248 sec +read A, create U memory usage: 0.00213318 GB +L=tril(A) time: 0.000331 sec +# triangles 19894 + +tricount time: 0.002636 sec (dot product method) +tri+prep time: 0.003215 sec (incl time to compute L and U) +compute C time: 0.002535 sec +reduce (C) time: 0.000102 sec +rate 7.28 million edges/sec (incl time for U=triu(A)) +rate 8.88 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00213329 GB +tricount time: 0.002341 sec (outer product method) +tri+prep time: 0.002588 sec (incl time to compute U) +compute C time: 0.002240 sec +reduce (C) time: 0.000100 sec +rate 9.04 million edges/sec (incl time for U=triu(A)) +rate 10.00 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00213318 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010331/oregon2_010331_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 62360 +nrows 10901 ncols 10901 +time to prune self-edges: 0.000081 +time to build the graph with GrB_Matrix_build: 0.000598 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000739 +matrix 10901 by 10901, 62360 entries, from stdin + +total time to read A matrix: 0.041631 sec + +n 10901 # edges 31180 +U=triu(A) time: 0.000288 sec +read A, create U memory usage: 0.00268173 GB +L=tril(A) time: 0.000393 sec +# triangles 82856 + +tricount time: 0.005815 sec (dot product method) +tri+prep time: 0.006497 sec (incl time to compute L and U) +compute C time: 0.005604 sec +reduce (C) time: 0.000211 sec +rate 4.80 million edges/sec (incl time for U=triu(A)) +rate 5.36 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00268184 GB +tricount time: 0.003347 sec (outer product method) +tri+prep time: 0.003635 sec (incl time to compute U) +compute C time: 0.003136 sec +reduce (C) time: 0.000211 sec +rate 8.58 million edges/sec (incl time for U=triu(A)) +rate 9.32 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00268173 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010407/oregon2_010407_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 61710 +nrows 10982 ncols 10982 +time to prune self-edges: 0.000080 +time to build the graph with GrB_Matrix_build: 0.000606 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000735 +matrix 10982 by 10982, 61710 entries, from stdin + +total time to read A matrix: 0.050312 sec + +n 10982 # edges 30855 +U=triu(A) time: 0.000287 sec +read A, create U memory usage: 0.00266157 GB +L=tril(A) time: 0.000390 sec +# triangles 78138 + +tricount time: 0.005696 sec (dot product method) +tri+prep time: 0.006373 sec (incl time to compute L and U) +compute C time: 0.005492 sec +reduce (C) time: 0.000205 sec +rate 4.84 million edges/sec (incl time for U=triu(A)) +rate 5.42 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00266168 GB +tricount time: 0.003295 sec (outer product method) +tri+prep time: 0.003582 sec (incl time to compute U) +compute C time: 0.003092 sec +reduce (C) time: 0.000203 sec +rate 8.61 million edges/sec (incl time for U=triu(A)) +rate 9.36 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00266157 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010414/oregon2_010414_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63522 +nrows 11020 ncols 11020 +time to prune self-edges: 0.000082 +time to build the graph with GrB_Matrix_build: 0.000592 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000750 +matrix 11020 by 11020, 63522 entries, from stdin + +total time to read A matrix: 0.035710 sec + +n 11020 # edges 31761 +U=triu(A) time: 0.000292 sec +read A, create U memory usage: 0.00272832 GB +L=tril(A) time: 0.000443 sec +# triangles 88905 + +tricount time: 0.006154 sec (dot product method) +tri+prep time: 0.006889 sec (incl time to compute L and U) +compute C time: 0.005937 sec +reduce (C) time: 0.000217 sec +rate 4.61 million edges/sec (incl time for U=triu(A)) +rate 5.16 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00272843 GB +tricount time: 0.003480 sec (outer product method) +tri+prep time: 0.003772 sec (incl time to compute U) +compute C time: 0.003265 sec +reduce (C) time: 0.000215 sec +rate 8.42 million edges/sec (incl time for U=triu(A)) +rate 9.13 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00272832 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010421/oregon2_010421_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63076 +nrows 11081 ncols 11081 +time to prune self-edges: 0.000081 +time to build the graph with GrB_Matrix_build: 0.000633 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000747 +matrix 11081 by 11081, 63076 entries, from stdin + +total time to read A matrix: 0.039468 sec + +n 11081 # edges 31538 +U=triu(A) time: 0.000290 sec +read A, create U memory usage: 0.0027147 GB +L=tril(A) time: 0.000401 sec +# triangles 82129 + +tricount time: 0.005921 sec (dot product method) +tri+prep time: 0.006612 sec (incl time to compute L and U) +compute C time: 0.005710 sec +reduce (C) time: 0.000211 sec +rate 4.77 million edges/sec (incl time for U=triu(A)) +rate 5.33 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00271482 GB +tricount time: 0.003427 sec (outer product method) +tri+prep time: 0.003717 sec (incl time to compute U) +compute C time: 0.003217 sec +reduce (C) time: 0.000209 sec +rate 8.48 million edges/sec (incl time for U=triu(A)) +rate 9.20 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0027147 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010428/oregon2_010428_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 62868 +nrows 11114 ncols 11114 +time to prune self-edges: 0.000081 +time to build the graph with GrB_Matrix_build: 0.000605 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000747 +matrix 11114 by 11114, 62868 entries, from stdin + +total time to read A matrix: 0.054835 sec + +n 11114 # edges 31434 +U=triu(A) time: 0.000290 sec +read A, create U memory usage: 0.00270854 GB +L=tril(A) time: 0.000401 sec +# triangles 78000 + +tricount time: 0.005792 sec (dot product method) +tri+prep time: 0.006483 sec (incl time to compute L and U) +compute C time: 0.005584 sec +reduce (C) time: 0.000207 sec +rate 4.85 million edges/sec (incl time for U=triu(A)) +rate 5.43 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00270865 GB +tricount time: 0.003391 sec (outer product method) +tri+prep time: 0.003680 sec (incl time to compute U) +compute C time: 0.003185 sec +reduce (C) time: 0.000206 sec +rate 8.54 million edges/sec (incl time for U=triu(A)) +rate 9.27 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00270854 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010505/oregon2_010505_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 61886 +nrows 11158 ncols 11158 +time to prune self-edges: 0.000080 +time to build the graph with GrB_Matrix_build: 0.000611 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000739 +matrix 11158 by 11158, 61886 entries, from stdin + +total time to read A matrix: 0.050279 sec + +n 11158 # edges 30943 +U=triu(A) time: 0.000288 sec +read A, create U memory usage: 0.00267495 GB +L=tril(A) time: 0.000395 sec +# triangles 72182 + +tricount time: 0.005495 sec (dot product method) +tri+prep time: 0.006178 sec (incl time to compute L and U) +compute C time: 0.005296 sec +reduce (C) time: 0.000199 sec +rate 5.01 million edges/sec (incl time for U=triu(A)) +rate 5.63 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00267506 GB +tricount time: 0.003299 sec (outer product method) +tri+prep time: 0.003586 sec (incl time to compute U) +compute C time: 0.003101 sec +reduce (C) time: 0.000198 sec +rate 8.63 million edges/sec (incl time for U=triu(A)) +rate 9.38 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00267495 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010512/oregon2_010512_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 62606 +nrows 11261 ncols 11261 +time to prune self-edges: 0.000081 +time to build the graph with GrB_Matrix_build: 0.000661 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000747 +matrix 11261 by 11261, 62606 entries, from stdin + +total time to read A matrix: 0.049427 sec + +n 11261 # edges 31303 +U=triu(A) time: 0.000296 sec +read A, create U memory usage: 0.00270499 GB +L=tril(A) time: 0.000403 sec +# triangles 72866 + +tricount time: 0.005579 sec (dot product method) +tri+prep time: 0.006278 sec (incl time to compute L and U) +compute C time: 0.005377 sec +reduce (C) time: 0.000202 sec +rate 4.99 million edges/sec (incl time for U=triu(A)) +rate 5.61 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0027051 GB +tricount time: 0.003351 sec (outer product method) +tri+prep time: 0.003647 sec (incl time to compute U) +compute C time: 0.003151 sec +reduce (C) time: 0.000200 sec +rate 8.58 million edges/sec (incl time for U=triu(A)) +rate 9.34 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00270499 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010519/oregon2_010519_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 64574 +nrows 11376 ncols 11376 +time to prune self-edges: 0.000083 +time to build the graph with GrB_Matrix_build: 0.000683 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000765 +matrix 11376 by 11376, 64574 entries, from stdin + +total time to read A matrix: 0.055960 sec + +n 11376 # edges 32287 +U=triu(A) time: 0.000299 sec +read A, create U memory usage: 0.00278043 GB +L=tril(A) time: 0.000422 sec +# triangles 83709 + +tricount time: 0.006076 sec (dot product method) +tri+prep time: 0.006797 sec (incl time to compute L and U) +compute C time: 0.005863 sec +reduce (C) time: 0.000213 sec +rate 4.75 million edges/sec (incl time for U=triu(A)) +rate 5.31 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00278055 GB +tricount time: 0.003546 sec (outer product method) +tri+prep time: 0.003845 sec (incl time to compute U) +compute C time: 0.003334 sec +reduce (C) time: 0.000212 sec +rate 8.40 million edges/sec (incl time for U=triu(A)) +rate 9.10 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00278043 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010526/oregon2_010526_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 65460 +nrows 11462 ncols 11462 +time to prune self-edges: 0.000085 +time to build the graph with GrB_Matrix_build: 0.000616 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000776 +matrix 11462 by 11462, 65460 entries, from stdin + +total time to read A matrix: 0.041085 sec + +n 11462 # edges 32730 +U=triu(A) time: 0.000303 sec +read A, create U memory usage: 0.00281577 GB +L=tril(A) time: 0.000417 sec +# triangles 89541 + +tricount time: 0.006302 sec (dot product method) +tri+prep time: 0.007022 sec (incl time to compute L and U) +compute C time: 0.006080 sec +reduce (C) time: 0.000221 sec +rate 4.66 million edges/sec (incl time for U=triu(A)) +rate 5.19 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00281588 GB +tricount time: 0.003644 sec (outer product method) +tri+prep time: 0.003947 sec (incl time to compute U) +compute C time: 0.003426 sec +reduce (C) time: 0.000218 sec +rate 8.29 million edges/sec (incl time for U=triu(A)) +rate 8.98 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00281577 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella04/p2p-Gnutella04_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 79988 +nrows 10877 ncols 10877 +time to prune self-edges: 0.000103 +time to build the graph with GrB_Matrix_build: 0.000758 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000928 +matrix 10877 by 10877, 79988 entries, from stdin + +total time to read A matrix: 0.067403 sec + +n 10877 # edges 39994 +U=triu(A) time: 0.000358 sec +read A, create U memory usage: 0.00331538 GB +L=tril(A) time: 0.000512 sec +# triangles 934 + +tricount time: 0.002746 sec (dot product method) +tri+prep time: 0.003617 sec (incl time to compute L and U) +compute C time: 0.002732 sec +reduce (C) time: 0.000014 sec +rate 11.06 million edges/sec (incl time for U=triu(A)) +rate 14.56 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00331549 GB +tricount time: 0.001027 sec (outer product method) +tri+prep time: 0.001385 sec (incl time to compute U) +compute C time: 0.001014 sec +reduce (C) time: 0.000013 sec +rate 28.88 million edges/sec (incl time for U=triu(A)) +rate 38.96 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00331538 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella05/p2p-Gnutella05_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63678 +nrows 8847 ncols 8847 +time to prune self-edges: 0.000082 +time to build the graph with GrB_Matrix_build: 0.000584 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000733 +matrix 8847 by 8847, 63678 entries, from stdin + +total time to read A matrix: 0.053969 sec + +n 8847 # edges 31839 +U=triu(A) time: 0.000286 sec +read A, create U memory usage: 0.00264702 GB +L=tril(A) time: 0.000411 sec +# triangles 1112 + +tricount time: 0.002177 sec (dot product method) +tri+prep time: 0.002873 sec (incl time to compute L and U) +compute C time: 0.002163 sec +reduce (C) time: 0.000014 sec +rate 11.08 million edges/sec (incl time for U=triu(A)) +rate 14.63 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00264713 GB +tricount time: 0.000821 sec (outer product method) +tri+prep time: 0.001107 sec (incl time to compute U) +compute C time: 0.000808 sec +reduce (C) time: 0.000013 sec +rate 28.75 million edges/sec (incl time for U=triu(A)) +rate 38.77 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00264702 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella06/p2p-Gnutella06_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63050 +nrows 8718 ncols 8718 +time to prune self-edges: 0.000081 +time to build the graph with GrB_Matrix_build: 0.000576 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000723 +matrix 8718 by 8718, 63050 entries, from stdin + +total time to read A matrix: 0.035301 sec + +n 8718 # edges 31525 +U=triu(A) time: 0.000283 sec +read A, create U memory usage: 0.00261925 GB +L=tril(A) time: 0.000403 sec +# triangles 1142 + +tricount time: 0.002135 sec (dot product method) +tri+prep time: 0.002821 sec (incl time to compute L and U) +compute C time: 0.002120 sec +reduce (C) time: 0.000015 sec +rate 11.17 million edges/sec (incl time for U=triu(A)) +rate 14.76 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00261936 GB +tricount time: 0.000806 sec (outer product method) +tri+prep time: 0.001090 sec (incl time to compute U) +compute C time: 0.000792 sec +reduce (C) time: 0.000014 sec +rate 28.93 million edges/sec (incl time for U=triu(A)) +rate 39.09 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00261925 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella08/p2p-Gnutella08_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 41554 +nrows 6302 ncols 6302 +time to prune self-edges: 0.000054 +time to build the graph with GrB_Matrix_build: 0.000402 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000483 +matrix 6302 by 6302, 41554 entries, from stdin + +total time to read A matrix: 0.026819 sec + +n 6302 # edges 20777 +U=triu(A) time: 0.000193 sec +read A, create U memory usage: 0.00174875 GB +L=tril(A) time: 0.000271 sec +# triangles 2383 + +tricount time: 0.001640 sec (dot product method) +tri+prep time: 0.002105 sec (incl time to compute L and U) +compute C time: 0.001617 sec +reduce (C) time: 0.000023 sec +rate 9.87 million edges/sec (incl time for U=triu(A)) +rate 12.67 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00174887 GB +tricount time: 0.000589 sec (outer product method) +tri+prep time: 0.000782 sec (incl time to compute U) +compute C time: 0.000567 sec +reduce (C) time: 0.000022 sec +rate 26.57 million edges/sec (incl time for U=triu(A)) +rate 35.29 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00174875 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella09/p2p-Gnutella09_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 52026 +nrows 8115 ncols 8115 +time to prune self-edges: 0.000068 +time to build the graph with GrB_Matrix_build: 0.000501 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000615 +matrix 8115 by 8115, 52026 entries, from stdin + +total time to read A matrix: 0.040167 sec + +n 8115 # edges 26013 +U=triu(A) time: 0.000240 sec +read A, create U memory usage: 0.00219827 GB +L=tril(A) time: 0.000343 sec +# triangles 2354 + +tricount time: 0.001952 sec (dot product method) +tri+prep time: 0.002535 sec (incl time to compute L and U) +compute C time: 0.001930 sec +reduce (C) time: 0.000022 sec +rate 10.26 million edges/sec (incl time for U=triu(A)) +rate 13.33 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00219838 GB +tricount time: 0.000703 sec (outer product method) +tri+prep time: 0.000943 sec (incl time to compute U) +compute C time: 0.000683 sec +reduce (C) time: 0.000021 sec +rate 27.57 million edges/sec (incl time for U=triu(A)) +rate 36.98 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00219827 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella24/p2p-Gnutella24_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 130738 +nrows 26519 ncols 26519 +time to prune self-edges: 0.000168 +time to build the graph with GrB_Matrix_build: 0.001406 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001708 +matrix 26519 by 26519, 130738 entries, from stdin + +total time to read A matrix: 0.093807 sec + +n 26519 # edges 65369 +U=triu(A) time: 0.000633 sec +read A, create U memory usage: 0.00576806 GB +L=tril(A) time: 0.000914 sec +# triangles 986 + +tricount time: 0.003489 sec (dot product method) +tri+prep time: 0.005036 sec (incl time to compute L and U) +compute C time: 0.003474 sec +reduce (C) time: 0.000016 sec +rate 12.98 million edges/sec (incl time for U=triu(A)) +rate 18.73 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00576817 GB +tricount time: 0.001643 sec (outer product method) +tri+prep time: 0.002276 sec (incl time to compute U) +compute C time: 0.001629 sec +reduce (C) time: 0.000014 sec +rate 28.72 million edges/sec (incl time for U=triu(A)) +rate 39.79 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00576806 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella25/p2p-Gnutella25_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 109410 +nrows 22688 ncols 22688 +time to prune self-edges: 0.000140 +time to build the graph with GrB_Matrix_build: 0.001126 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001424 +matrix 22688 by 22688, 109410 entries, from stdin + +total time to read A matrix: 0.061393 sec + +n 22688 # edges 54705 +U=triu(A) time: 0.000541 sec +read A, create U memory usage: 0.00484701 GB +L=tril(A) time: 0.000770 sec +# triangles 806 + +tricount time: 0.002741 sec (dot product method) +tri+prep time: 0.004052 sec (incl time to compute L and U) +compute C time: 0.002728 sec +reduce (C) time: 0.000013 sec +rate 13.50 million edges/sec (incl time for U=triu(A)) +rate 19.96 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00484712 GB +tricount time: 0.001337 sec (outer product method) +tri+prep time: 0.001879 sec (incl time to compute U) +compute C time: 0.001326 sec +reduce (C) time: 0.000012 sec +rate 29.12 million edges/sec (incl time for U=triu(A)) +rate 40.91 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00484701 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella30/p2p-Gnutella30_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 176656 +nrows 36683 ncols 36683 +time to prune self-edges: 0.000226 +time to build the graph with GrB_Matrix_build: 0.001812 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.002352 +matrix 36683 by 36683, 176656 entries, from stdin + +total time to read A matrix: 0.096834 sec + +n 36683 # edges 88328 +U=triu(A) time: 0.000846 sec +read A, create U memory usage: 0.00782767 GB +L=tril(A) time: 0.001244 sec +# triangles 1590 + +tricount time: 0.004678 sec (dot product method) +tri+prep time: 0.006768 sec (incl time to compute L and U) +compute C time: 0.004654 sec +reduce (C) time: 0.000023 sec +rate 13.05 million edges/sec (incl time for U=triu(A)) +rate 18.88 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00782778 GB +tricount time: 0.002190 sec (outer product method) +tri+prep time: 0.003036 sec (incl time to compute U) +compute C time: 0.002168 sec +reduce (C) time: 0.000021 sec +rate 29.10 million edges/sec (incl time for U=triu(A)) +rate 40.34 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00782767 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella31/p2p-Gnutella31_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 295784 +nrows 62587 ncols 62587 +time to prune self-edges: 0.000382 +time to build the graph with GrB_Matrix_build: 0.003085 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.004095 +matrix 62587 by 62587, 295784 entries, from stdin + +total time to read A matrix: 0.166779 sec + +n 62587 # edges 147892 +U=triu(A) time: 0.001449 sec +read A, create U memory usage: 0.0131524 GB +L=tril(A) time: 0.002111 sec +# triangles 2024 + +tricount time: 0.007769 sec (dot product method) +tri+prep time: 0.011329 sec (incl time to compute L and U) +compute C time: 0.007738 sec +reduce (C) time: 0.000031 sec +rate 13.05 million edges/sec (incl time for U=triu(A)) +rate 19.04 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0131525 GB +tricount time: 0.003687 sec (outer product method) +tri+prep time: 0.005135 sec (incl time to compute U) +compute C time: 0.003658 sec +reduce (C) time: 0.000028 sec +rate 28.80 million edges/sec (incl time for U=triu(A)) +rate 40.12 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0131524 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/roadNet-CA/roadNet-CA_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 5533214 +nrows 1965207 ncols 1965207 +time to prune self-edges: 0.009962 +time to build the graph with GrB_Matrix_build: 0.056365 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.088357 +matrix 1965207 by 1965207, 5533214 entries, from stdin + +total time to read A matrix: 3.220948 sec + +n 1965207 # edges 2766607 +U=triu(A) time: 0.031909 sec +read A, create U memory usage: 0.277805 GB +L=tril(A) time: 0.042631 sec +# triangles 120676 + +tricount time: 0.055080 sec (dot product method) +tri+prep time: 0.129620 sec (incl time to compute L and U) +compute C time: 0.053369 sec +reduce (C) time: 0.001712 sec +rate 21.34 million edges/sec (incl time for U=triu(A)) +rate 50.23 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.277805 GB +tricount time: 0.150015 sec (outer product method) +tri+prep time: 0.181924 sec (incl time to compute U) +compute C time: 0.148289 sec +reduce (C) time: 0.001726 sec +rate 15.21 million edges/sec (incl time for U=triu(A)) +rate 18.44 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.314854 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/roadNet-PA/roadNet-PA_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 3083796 +nrows 1088093 ncols 1088093 +time to prune self-edges: 0.004848 +time to build the graph with GrB_Matrix_build: 0.031154 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.046554 +matrix 1088093 by 1088093, 3083796 entries, from stdin + +total time to read A matrix: 1.762782 sec + +n 1088093 # edges 1541898 +U=triu(A) time: 0.017328 sec +read A, create U memory usage: 0.154541 GB +L=tril(A) time: 0.023507 sec +# triangles 67150 + +tricount time: 0.030676 sec (dot product method) +tri+prep time: 0.071511 sec (incl time to compute L and U) +compute C time: 0.029717 sec +reduce (C) time: 0.000960 sec +rate 21.56 million edges/sec (incl time for U=triu(A)) +rate 50.26 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.154541 GB +tricount time: 0.082617 sec (outer product method) +tri+prep time: 0.099945 sec (incl time to compute U) +compute C time: 0.081654 sec +reduce (C) time: 0.000963 sec +rate 15.43 million edges/sec (incl time for U=triu(A)) +rate 18.66 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.174968 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/roadNet-TX/roadNet-TX_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 3843320 +nrows 1379918 ncols 1379918 +time to prune self-edges: 0.006627 +time to build the graph with GrB_Matrix_build: 0.038702 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.059274 +matrix 1379918 by 1379918, 3843320 entries, from stdin + +total time to read A matrix: 2.241019 sec + +n 1379918 # edges 1921660 +U=triu(A) time: 0.021698 sec +read A, create U memory usage: 0.193557 GB +L=tril(A) time: 0.029135 sec +# triangles 82869 + +tricount time: 0.037180 sec (dot product method) +tri+prep time: 0.088014 sec (incl time to compute L and U) +compute C time: 0.036000 sec +reduce (C) time: 0.001181 sec +rate 21.83 million edges/sec (incl time for U=triu(A)) +rate 51.69 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.193557 GB +tricount time: 0.102047 sec (outer product method) +tri+prep time: 0.123746 sec (incl time to compute U) +compute C time: 0.100871 sec +reduce (C) time: 0.001176 sec +rate 15.53 million edges/sec (incl time for U=triu(A)) +rate 18.83 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.21841 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-1045506-262144_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 2091012 +nrows 262145 ncols 262145 +time to prune self-edges: 0.007431 +time to build the graph with GrB_Matrix_build: 0.020086 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.022261 +matrix 262145 by 262145, 2091012 entries, from stdin + +total time to read A matrix: 1.162887 sec + +n 262145 # edges 1045506 +U=triu(A) time: 0.009322 sec +read A, create U memory usage: 0.085763 GB +L=tril(A) time: 0.011626 sec +# triangles 1044484 + +tricount time: 0.029916 sec (dot product method) +tri+prep time: 0.050864 sec (incl time to compute L and U) +compute C time: 0.022426 sec +reduce (C) time: 0.007490 sec +rate 20.55 million edges/sec (incl time for U=triu(A)) +rate 34.95 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0857631 GB +tricount time: 0.027257 sec (outer product method) +tri+prep time: 0.036579 sec (incl time to compute U) +compute C time: 0.019768 sec +reduce (C) time: 0.007489 sec +rate 28.58 million edges/sec (incl time for U=triu(A)) +rate 38.36 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.085763 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-16764930-4194304_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 33529860 +nrows 4194305 ncols 4194305 +time to prune self-edges: 0.051537 +time to build the graph with GrB_Matrix_build: 0.280340 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.427067 +matrix 4194305 by 4194305, 33529860 entries, from stdin + +total time to read A matrix: 19.117987 sec + +n 4194305 # edges 16764930 +U=triu(A) time: 0.156538 sec +read A, create U memory usage: 1.37485 GB +L=tril(A) time: 0.182126 sec +# triangles 16760836 + +tricount time: 0.490339 sec (dot product method) +tri+prep time: 0.829003 sec (incl time to compute L and U) +compute C time: 0.368927 sec +reduce (C) time: 0.121412 sec +rate 20.22 million edges/sec (incl time for U=triu(A)) +rate 34.19 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 1.37485 GB +tricount time: 0.442260 sec (outer product method) +tri+prep time: 0.598798 sec (incl time to compute U) +compute C time: 0.320995 sec +reduce (C) time: 0.121265 sec +rate 28.00 million edges/sec (incl time for U=triu(A)) +rate 37.91 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 1.37485 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-260610-65536_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 521220 +nrows 65537 ncols 65537 +time to prune self-edges: 0.000681 +time to build the graph with GrB_Matrix_build: 0.004507 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.005456 +matrix 65537 by 65537, 521220 entries, from stdin + +total time to read A matrix: 0.295282 sec + +n 65537 # edges 260610 +U=triu(A) time: 0.002278 sec +read A, create U memory usage: 0.0213861 GB +L=tril(A) time: 0.002852 sec +# triangles 260100 + +tricount time: 0.007487 sec (dot product method) +tri+prep time: 0.012617 sec (incl time to compute L and U) +compute C time: 0.005617 sec +reduce (C) time: 0.001870 sec +rate 20.66 million edges/sec (incl time for U=triu(A)) +rate 34.81 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0213862 GB +tricount time: 0.006768 sec (outer product method) +tri+prep time: 0.009046 sec (incl time to compute U) +compute C time: 0.004902 sec +reduce (C) time: 0.001866 sec +rate 28.81 million edges/sec (incl time for U=triu(A)) +rate 38.50 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0213861 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-268386306-67108864_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 536772612 +nrows 67108865 ncols 67108865 +time to prune self-edges: 0.958522 +time to build the graph with GrB_Matrix_build: 4.500179 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 5.797496 +matrix 67108865 by 67108865, 536772612 entries, from stdin + +total time to read A matrix: 321.100216 sec + +n 67108865 # edges 268386306 +U=triu(A) time: 2.557677 sec +read A, create U memory usage: 22.0082 GB +L=tril(A) time: 2.939150 sec +# triangles 268369924 + +tricount time: 7.856853 sec (dot product method) +tri+prep time: 13.353680 sec (incl time to compute L and U) +compute C time: 5.905209 sec +reduce (C) time: 1.951644 sec +rate 20.10 million edges/sec (incl time for U=triu(A)) +rate 34.16 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 22.0082 GB +tricount time: 7.096396 sec (outer product method) +tri+prep time: 9.654073 sec (incl time to compute U) +compute C time: 5.155217 sec +reduce (C) time: 1.941179 sec +rate 27.80 million edges/sec (incl time for U=triu(A)) +rate 37.82 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 22.0082 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-4188162-1048576_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 8376324 +nrows 1048577 ncols 1048577 +time to prune self-edges: 0.017390 +time to build the graph with GrB_Matrix_build: 0.072306 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.089823 +matrix 1048577 by 1048577, 8376324 entries, from stdin + +total time to read A matrix: 4.611660 sec + +n 1048577 # edges 4188162 +U=triu(A) time: 0.038214 sec +read A, create U memory usage: 0.343491 GB +L=tril(A) time: 0.045611 sec +# triangles 4186116 + +tricount time: 0.121902 sec (dot product method) +tri+prep time: 0.205728 sec (incl time to compute L and U) +compute C time: 0.091515 sec +reduce (C) time: 0.030387 sec +rate 20.36 million edges/sec (incl time for U=triu(A)) +rate 34.36 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.343492 GB +tricount time: 0.110532 sec (outer product method) +tri+prep time: 0.148746 sec (incl time to compute U) +compute C time: 0.080340 sec +reduce (C) time: 0.030192 sec +rate 28.16 million edges/sec (incl time for U=triu(A)) +rate 37.89 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.343491 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/DIMACS10/hugebubbles-00020_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63580358 +nrows 21198120 ncols 21198120 +time to prune self-edges: 0.102726 +time to build the graph with GrB_Matrix_build: 0.496943 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 2.765700 +matrix 21198120 by 21198120, 63580358 entries, from stdin + +total time to read A matrix: 39.948353 sec + +n 21198120 # edges 31790179 +U=triu(A) time: 0.338591 sec +read A, create U memory usage: 3.13682 GB +L=tril(A) time: 0.397308 sec +# triangles 0 + +tricount time: 2.346817 sec (dot product method) +tri+prep time: 3.082716 sec (incl time to compute L and U) +compute C time: 2.344962 sec +reduce (C) time: 0.001855 sec +rate 10.31 million edges/sec (incl time for U=triu(A)) +rate 13.55 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 3.13682 GB +tricount time: 6.286390 sec (outer product method) +tri+prep time: 6.624981 sec (incl time to compute U) +compute C time: 6.284822 sec +reduce (C) time: 0.001568 sec +rate 4.80 million edges/sec (incl time for U=triu(A)) +rate 5.06 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 3.39831 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/vanHeukelum/cage15_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 94044692 +nrows 5154860 ncols 5154860 +time to prune self-edges: 0.158475 +time to build the graph with GrB_Matrix_build: 0.755073 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 1.191720 +matrix 5154860 by 5154860, 94044692 entries, from stdin + +total time to read A matrix: 54.063371 sec + +n 5154860 # edges 47022346 +U=triu(A) time: 0.387604 sec +read A, create U memory usage: 3.5918 GB +L=tril(A) time: 0.494457 sec +# triangles 36106416 + +tricount time: 2.881892 sec (dot product method) +tri+prep time: 3.763953 sec (incl time to compute L and U) +compute C time: 2.540954 sec +reduce (C) time: 0.340939 sec +rate 12.49 million edges/sec (incl time for U=triu(A)) +rate 16.32 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 3.5918 GB +tricount time: 1.906832 sec (outer product method) +tri+prep time: 2.294436 sec (incl time to compute U) +compute C time: 1.564882 sec +reduce (C) time: 0.341951 sec +rate 20.49 million edges/sec (incl time for U=triu(A)) +rate 24.66 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 3.5918 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/Freescale/circuit5M_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 53967852 +nrows 5558327 ncols 5558327 +time to prune self-edges: 0.079256 +time to build the graph with GrB_Matrix_build: 0.412237 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.632489 +matrix 5558327 by 5558327, 53967852 entries, from stdin + +total time to read A matrix: 30.264331 sec + +n 5558327 # edges 26983926 +U=triu(A) time: 0.237427 sec +read A, create U memory usage: 2.16518 GB +L=tril(A) time: 0.264510 sec +# triangles 31019473 + +tricount time: 1.953011 sec (dot product method) +tri+prep time: 2.454949 sec (incl time to compute L and U) +compute C time: 1.654011 sec +reduce (C) time: 0.299000 sec +rate 10.99 million edges/sec (incl time for U=triu(A)) +rate 13.82 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 2.16518 GB +tricount time: 451.458944 sec (outer product method) +tri+prep time: 451.696371 sec (incl time to compute U) +compute C time: 451.154225 sec +reduce (C) time: 0.304719 sec +rate 0.06 million edges/sec (incl time for U=triu(A)) +rate 0.06 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 2.16518 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale18-ef16/graph500-scale18-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 7600696 +nrows 174148 ncols 174148 +time to prune self-edges: 0.037345 +time to build the graph with GrB_Matrix_build: 0.064544 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.117777 +matrix 174148 by 174148, 7600696 entries, from stdin + +total time to read A matrix: 5.779402 sec + +n 174148 # edges 3800348 +U=triu(A) time: 0.029689 sec +read A, create U memory usage: 0.280592 GB +L=tril(A) time: 0.038261 sec +# triangles 82287285 + +tricount time: 15.911034 sec (dot product method) +tri+prep time: 15.978984 sec (incl time to compute L and U) +compute C time: 15.870555 sec +reduce (C) time: 0.040479 sec +rate 0.24 million edges/sec (incl time for U=triu(A)) +rate 0.24 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.280592 GB +tricount time: 3.183447 sec (outer product method) +tri+prep time: 3.213136 sec (incl time to compute U) +compute C time: 3.143298 sec +reduce (C) time: 0.040149 sec +rate 1.18 million edges/sec (incl time for U=triu(A)) +rate 1.19 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.280592 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale19-ef16/graph500-scale19-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 15459350 +nrows 335319 ncols 335319 +time to prune self-edges: 0.025127 +time to build the graph with GrB_Matrix_build: 0.117764 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.305453 +matrix 335319 by 335319, 15459350 entries, from stdin + +total time to read A matrix: 8.430525 sec + +n 335319 # edges 7729675 +U=triu(A) time: 0.060590 sec +read A, create U memory usage: 0.56995 GB +L=tril(A) time: 0.077361 sec +# triangles 186288972 + +tricount time: 43.541612 sec (dot product method) +tri+prep time: 43.679563 sec (incl time to compute L and U) +compute C time: 43.459520 sec +reduce (C) time: 0.082092 sec +rate 0.18 million edges/sec (incl time for U=triu(A)) +rate 0.18 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.56995 GB +tricount time: 8.058348 sec (outer product method) +tri+prep time: 8.118937 sec (incl time to compute U) +compute C time: 7.977839 sec +reduce (C) time: 0.080509 sec +rate 0.95 million edges/sec (incl time for U=triu(A)) +rate 0.96 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.56995 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale20-ef16/graph500-scale20-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 31361722 +nrows 645821 ncols 645821 +time to prune self-edges: 0.054969 +time to build the graph with GrB_Matrix_build: 0.238656 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 1.035722 +matrix 645821 by 645821, 31361722 entries, from stdin + +total time to read A matrix: 17.553108 sec + +n 645821 # edges 15680861 +U=triu(A) time: 0.123237 sec +read A, create U memory usage: 1.15486 GB +L=tril(A) time: 0.156359 sec +# triangles 419349784 + +tricount time: 111.600069 sec (dot product method) +tri+prep time: 111.879665 sec (incl time to compute L and U) +compute C time: 111.435616 sec +reduce (C) time: 0.164453 sec +rate 0.14 million edges/sec (incl time for U=triu(A)) +rate 0.14 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 1.15486 GB +tricount time: 23.314847 sec (outer product method) +tri+prep time: 23.438084 sec (incl time to compute U) +compute C time: 23.150806 sec +reduce (C) time: 0.164041 sec +rate 0.67 million edges/sec (incl time for U=triu(A)) +rate 0.67 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 1.15486 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale21-ef16/graph500-scale21-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63463300 +nrows 1243073 ncols 1243073 +time to prune self-edges: 0.093413 +time to build the graph with GrB_Matrix_build: 0.477578 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 3.235574 +matrix 1243073 by 1243073, 63463300 entries, from stdin + +total time to read A matrix: 37.212293 sec + +n 1243073 # edges 31731650 +U=triu(A) time: 0.249255 sec +read A, create U memory usage: 2.3344 GB +L=tril(A) time: 0.315442 sec +# triangles 935100883 + +tricount time: 290.295431 sec (dot product method) +tri+prep time: 290.860128 sec (incl time to compute L and U) +compute C time: 289.966433 sec +reduce (C) time: 0.328999 sec +rate 0.11 million edges/sec (incl time for U=triu(A)) +rate 0.11 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 2.3344 GB +tricount time: 72.300361 sec (outer product method) +tri+prep time: 72.549616 sec (incl time to compute U) +compute C time: 71.971883 sec +reduce (C) time: 0.328478 sec +rate 0.44 million edges/sec (incl time for U=triu(A)) +rate 0.44 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 2.3344 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale22-ef16/graph500-scale22-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 128194008 +nrows 2393286 ncols 2393286 +time to prune self-edges: 0.186319 +time to build the graph with GrB_Matrix_build: 0.961453 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 9.492602 +matrix 2393286 by 2393286, 128194008 entries, from stdin + +total time to read A matrix: 80.337484 sec + +n 2393286 # edges 64097004 +U=triu(A) time: 0.498915 sec +read A, create U memory usage: 4.71072 GB +L=tril(A) time: 0.635984 sec +# triangles 2067392370 + +tricount time: 778.703326 sec (dot product method) +tri+prep time: 779.838225 sec (incl time to compute L and U) +compute C time: 778.050580 sec +reduce (C) time: 0.652746 sec +rate 0.08 million edges/sec (incl time for U=triu(A)) +rate 0.08 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 4.71072 GB +tricount time: 188.108980 sec (outer product method) +tri+prep time: 188.607896 sec (incl time to compute U) +compute C time: 187.461724 sec +reduce (C) time: 0.647257 sec +rate 0.34 million edges/sec (incl time for U=triu(A)) +rate 0.34 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 4.71072 GB + diff --git a/GraphBLAS/Demo/Output/go_out_laptop.txt b/GraphBLAS/Demo/Output/go_out_laptop.txt new file mode 100644 index 0000000000..704d3e1ae7 --- /dev/null +++ b/GraphBLAS/Demo/Output/go_out_laptop.txt @@ -0,0 +1,2660 @@ + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/Mallya/lhr71_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 2985588 +nrows 70305 ncols 70305 +time to prune self-edges: 0.036498 +time to build the graph with GrB_Matrix_build: 0.041482 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.049858 +matrix 70305 by 70305, 2985588 entries, from stdin + +total time to read A matrix: 1.206487 sec + +n 70305 # edges 1492794 +U=triu(A) time: 0.015589 sec +read A, create U memory usage: 0.110294 GB +L=tril(A) time: 0.017856 sec +# triangles 160592 + +tricount time: 0.024183 sec (dot product method) +tri+prep time: 0.057628 sec (incl time to compute L and U) +compute C time: 0.023846 sec +reduce (C) time: 0.000337 sec +rate 25.90 million edges/sec (incl time for U=triu(A)) +rate 61.73 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.110294 GB +tricount time: 0.014046 sec (outer product method) +tri+prep time: 0.029635 sec (incl time to compute U) +compute C time: 0.013490 sec +reduce (C) time: 0.000556 sec +rate 50.37 million edges/sec (incl time for U=triu(A)) +rate 106.28 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.110294 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/Freescale/Freescale2_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 11489868 +nrows 2999350 ncols 2999350 +time to prune self-edges: 0.147825 +time to build the graph with GrB_Matrix_build: 0.181471 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.311780 +matrix 2999350 by 2999350, 11489868 entries, from stdin + +total time to read A matrix: 5.249749 sec + +n 2999350 # edges 5744934 +U=triu(A) time: 0.087506 sec +read A, create U memory usage: 0.53361 GB +L=tril(A) time: 0.109751 sec +# triangles 21027280 + +tricount time: 0.303843 sec (dot product method) +tri+prep time: 0.501100 sec (incl time to compute L and U) +compute C time: 0.283759 sec +reduce (C) time: 0.020084 sec +rate 11.46 million edges/sec (incl time for U=triu(A)) +rate 18.91 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.53361 GB +tricount time: 0.188248 sec (outer product method) +tri+prep time: 0.275754 sec (incl time to compute U) +compute C time: 0.168175 sec +reduce (C) time: 0.020073 sec +rate 20.83 million edges/sec (incl time for U=triu(A)) +rate 30.52 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.53361 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/cit-HepPh/cit-HepPh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 841754 +nrows 34547 ncols 34547 +time to prune self-edges: 0.011195 +time to build the graph with GrB_Matrix_build: 0.011980 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.026698 +matrix 34547 by 34547, 841754 entries, from stdin + +total time to read A matrix: 0.360870 sec + +n 34547 # edges 420877 +U=triu(A) time: 0.004859 sec +read A, create U memory usage: 0.0316858 GB +L=tril(A) time: 0.005081 sec +# triangles 1276868 + +tricount time: 0.170150 sec (dot product method) +tri+prep time: 0.180090 sec (incl time to compute L and U) +compute C time: 0.168247 sec +reduce (C) time: 0.001903 sec +rate 2.34 million edges/sec (incl time for U=triu(A)) +rate 2.47 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0316859 GB +tricount time: 0.044133 sec (outer product method) +tri+prep time: 0.048992 sec (incl time to compute U) +compute C time: 0.042320 sec +reduce (C) time: 0.001813 sec +rate 8.59 million edges/sec (incl time for U=triu(A)) +rate 9.54 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0316858 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/cit-HepTh/cit-HepTh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 704570 +nrows 27771 ncols 27771 +time to prune self-edges: 0.008799 +time to build the graph with GrB_Matrix_build: 0.009596 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.020322 +matrix 27771 by 27771, 704570 entries, from stdin + +total time to read A matrix: 0.301584 sec + +n 27771 # edges 352285 +U=triu(A) time: 0.004049 sec +read A, create U memory usage: 0.0264761 GB +L=tril(A) time: 0.004224 sec +# triangles 1478735 + +tricount time: 0.163439 sec (dot product method) +tri+prep time: 0.171712 sec (incl time to compute L and U) +compute C time: 0.161695 sec +reduce (C) time: 0.001744 sec +rate 2.05 million edges/sec (incl time for U=triu(A)) +rate 2.16 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0264762 GB +tricount time: 0.042384 sec (outer product method) +tri+prep time: 0.046433 sec (incl time to compute U) +compute C time: 0.040670 sec +reduce (C) time: 0.001714 sec +rate 7.59 million edges/sec (incl time for U=triu(A)) +rate 8.31 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0264761 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/email-EuAll/email-EuAll_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 728962 +nrows 265215 ncols 265215 +time to prune self-edges: 0.009026 +time to build the graph with GrB_Matrix_build: 0.011758 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.033395 +matrix 265215 by 265215, 728962 entries, from stdin + +total time to read A matrix: 0.330759 sec + +n 265215 # edges 364481 +U=triu(A) time: 0.005805 sec +read A, create U memory usage: 0.036852 GB +L=tril(A) time: 0.006603 sec +# triangles 267313 + +tricount time: 0.121052 sec (dot product method) +tri+prep time: 0.133460 sec (incl time to compute L and U) +compute C time: 0.120631 sec +reduce (C) time: 0.000421 sec +rate 2.73 million edges/sec (incl time for U=triu(A)) +rate 3.01 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0368521 GB +tricount time: 0.029472 sec (outer product method) +tri+prep time: 0.035277 sec (incl time to compute U) +compute C time: 0.029084 sec +reduce (C) time: 0.000388 sec +rate 10.33 million edges/sec (incl time for U=triu(A)) +rate 12.37 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.036852 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/soc-Epinions1/soc-Epinions1_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 811480 +nrows 75880 ncols 75880 +time to prune self-edges: 0.011025 +time to build the graph with GrB_Matrix_build: 0.012489 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.024259 +matrix 75880 by 75880, 811480 entries, from stdin + +total time to read A matrix: 0.350488 sec + +n 75880 # edges 405740 +U=triu(A) time: 0.005670 sec +read A, create U memory usage: 0.0322492 GB +L=tril(A) time: 0.005481 sec +# triangles 1624481 + +tricount time: 0.365275 sec (dot product method) +tri+prep time: 0.376426 sec (incl time to compute L and U) +compute C time: 0.363900 sec +reduce (C) time: 0.001375 sec +rate 1.08 million edges/sec (incl time for U=triu(A)) +rate 1.11 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0322493 GB +tricount time: 0.061808 sec (outer product method) +tri+prep time: 0.067478 sec (incl time to compute U) +compute C time: 0.060445 sec +reduce (C) time: 0.001363 sec +rate 6.01 million edges/sec (incl time for U=triu(A)) +rate 6.56 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0322492 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/soc-Slashdot0811/soc-Slashdot0811_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 938360 +nrows 77361 ncols 77361 +time to prune self-edges: 0.009338 +time to build the graph with GrB_Matrix_build: 0.013627 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.030415 +matrix 77361 by 77361, 938360 entries, from stdin + +total time to read A matrix: 0.403450 sec + +n 77361 # edges 469180 +U=triu(A) time: 0.005481 sec +read A, create U memory usage: 0.0368761 GB +L=tril(A) time: 0.006220 sec +# triangles 551724 + +tricount time: 0.306412 sec (dot product method) +tri+prep time: 0.318113 sec (incl time to compute L and U) +compute C time: 0.305540 sec +reduce (C) time: 0.000872 sec +rate 1.47 million edges/sec (incl time for U=triu(A)) +rate 1.53 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0368762 GB +tricount time: 0.046443 sec (outer product method) +tri+prep time: 0.051924 sec (incl time to compute U) +compute C time: 0.045543 sec +reduce (C) time: 0.000900 sec +rate 9.04 million edges/sec (incl time for U=triu(A)) +rate 10.10 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0368761 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/soc-Slashdot0902/soc-Slashdot0902_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 1008460 +nrows 82169 ncols 82169 +time to prune self-edges: 0.009512 +time to build the graph with GrB_Matrix_build: 0.014537 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.033015 +matrix 82169 by 82169, 1008460 entries, from stdin + +total time to read A matrix: 0.434311 sec + +n 82169 # edges 504230 +U=triu(A) time: 0.005979 sec +read A, create U memory usage: 0.039592 GB +L=tril(A) time: 0.006323 sec +# triangles 602592 + +tricount time: 0.326855 sec (dot product method) +tri+prep time: 0.339157 sec (incl time to compute L and U) +compute C time: 0.325854 sec +reduce (C) time: 0.001001 sec +rate 1.49 million edges/sec (incl time for U=triu(A)) +rate 1.54 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0395922 GB +tricount time: 0.052578 sec (outer product method) +tri+prep time: 0.058557 sec (incl time to compute U) +compute C time: 0.051124 sec +reduce (C) time: 0.001454 sec +rate 8.61 million edges/sec (incl time for U=triu(A)) +rate 9.59 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.039592 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/amazon0312/amazon0312_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 4699738 +nrows 400728 ncols 400728 +time to prune self-edges: 0.072794 +time to build the graph with GrB_Matrix_build: 0.068435 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.264225 +matrix 400728 by 400728, 4699738 entries, from stdin + +total time to read A matrix: 2.215676 sec + +n 400728 # edges 2349869 +U=triu(A) time: 0.032870 sec +read A, create U memory usage: 0.18522 GB +L=tril(A) time: 0.038956 sec +# triangles 3686467 + +tricount time: 0.441918 sec (dot product method) +tri+prep time: 0.513744 sec (incl time to compute L and U) +compute C time: 0.431539 sec +reduce (C) time: 0.010379 sec +rate 4.57 million edges/sec (incl time for U=triu(A)) +rate 5.32 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.185221 GB +tricount time: 0.273077 sec (outer product method) +tri+prep time: 0.305947 sec (incl time to compute U) +compute C time: 0.261258 sec +reduce (C) time: 0.011819 sec +rate 7.68 million edges/sec (incl time for U=triu(A)) +rate 8.61 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.18522 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/amazon0505/amazon0505_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 4878874 +nrows 410237 ncols 410237 +time to prune self-edges: 0.076347 +time to build the graph with GrB_Matrix_build: 0.073038 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.271347 +matrix 410237 by 410237, 4878874 entries, from stdin + +total time to read A matrix: 2.315225 sec + +n 410237 # edges 2439437 +U=triu(A) time: 0.033339 sec +read A, create U memory usage: 0.19205 GB +L=tril(A) time: 0.038188 sec +# triangles 3951063 + +tricount time: 0.473278 sec (dot product method) +tri+prep time: 0.544805 sec (incl time to compute L and U) +compute C time: 0.462338 sec +reduce (C) time: 0.010940 sec +rate 4.48 million edges/sec (incl time for U=triu(A)) +rate 5.15 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.19205 GB +tricount time: 0.263919 sec (outer product method) +tri+prep time: 0.297258 sec (incl time to compute U) +compute C time: 0.252660 sec +reduce (C) time: 0.011259 sec +rate 8.21 million edges/sec (incl time for U=triu(A)) +rate 9.24 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.19205 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/amazon0601/amazon0601_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 4886816 +nrows 403395 ncols 403395 +time to prune self-edges: 0.072861 +time to build the graph with GrB_Matrix_build: 0.074150 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.280947 +matrix 403395 by 403395, 4886816 entries, from stdin + +total time to read A matrix: 2.230342 sec + +n 403395 # edges 2443408 +U=triu(A) time: 0.032487 sec +read A, create U memory usage: 0.192062 GB +L=tril(A) time: 0.037376 sec +# triangles 3986507 + +tricount time: 0.492950 sec (dot product method) +tri+prep time: 0.562813 sec (incl time to compute L and U) +compute C time: 0.481698 sec +reduce (C) time: 0.011252 sec +rate 4.34 million edges/sec (incl time for U=triu(A)) +rate 4.96 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.192062 GB +tricount time: 0.263060 sec (outer product method) +tri+prep time: 0.295547 sec (incl time to compute U) +compute C time: 0.249376 sec +reduce (C) time: 0.013684 sec +rate 8.27 million edges/sec (incl time for U=triu(A)) +rate 9.29 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.192062 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/flickrEdges/flickrEdges_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 4633896 +nrows 105939 ncols 105939 +time to prune self-edges: 0.072251 +time to build the graph with GrB_Matrix_build: 0.065099 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.223663 +matrix 105939 by 105939, 4633896 entries, from stdin + +total time to read A matrix: 2.060772 sec + +n 105939 # edges 2316948 +U=triu(A) time: 0.026300 sec +read A, create U memory usage: 0.171059 GB +L=tril(A) time: 0.024799 sec +# triangles 107987357 + +tricount time: 5.996290 sec (dot product method) +tri+prep time: 6.047389 sec (incl time to compute L and U) +compute C time: 5.981808 sec +reduce (C) time: 0.014482 sec +rate 0.38 million edges/sec (incl time for U=triu(A)) +rate 0.39 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.171059 GB +tricount time: 1.216013 sec (outer product method) +tri+prep time: 1.242313 sec (incl time to compute U) +compute C time: 1.200246 sec +reduce (C) time: 0.015767 sec +rate 1.87 million edges/sec (incl time for U=triu(A)) +rate 1.91 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.171059 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/cit-Patents/cit-Patents_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 33037894 +nrows 3774769 ncols 3774769 +time to prune self-edges: 0.253220 +time to build the graph with GrB_Matrix_build: 0.504631 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 4.848224 +matrix 3774769 by 3774769, 33037894 entries, from stdin + +total time to read A matrix: 19.125177 sec + +n 3774769 # edges 16518947 +U=triu(A) time: 0.227128 sec +read A, create U memory usage: 1.34036 GB +L=tril(A) time: 0.268458 sec +# triangles 7515023 + +tricount time: 3.920048 sec (dot product method) +tri+prep time: 4.415634 sec (incl time to compute L and U) +compute C time: 3.889580 sec +reduce (C) time: 0.030468 sec +rate 3.74 million edges/sec (incl time for U=triu(A)) +rate 4.21 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 1.34036 GB +tricount time: 2.073183 sec (outer product method) +tri+prep time: 2.300311 sec (incl time to compute U) +compute C time: 2.038365 sec +reduce (C) time: 0.034818 sec +rate 7.18 million edges/sec (incl time for U=triu(A)) +rate 7.97 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 1.34036 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/SNAP/soc-LiveJournal1_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 85702474 +nrows 4847572 ncols 4847572 +time to prune self-edges: 0.587371 +time to build the graph with GrB_Matrix_build: 1.432374 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 7.592359 +matrix 4847572 by 4847572, 85702474 entries, from stdin + +total time to read A matrix: 42.656188 sec + +n 4847572 # edges 42851237 +U=triu(A) time: 0.574254 sec +read A, create U memory usage: 3.27919 GB +L=tril(A) time: 0.605192 sec +# triangles 285730264 + +tricount time: 38.587100 sec (dot product method) +tri+prep time: 39.766546 sec (incl time to compute L and U) +compute C time: 38.403422 sec +reduce (C) time: 0.183678 sec +rate 1.08 million edges/sec (incl time for U=triu(A)) +rate 1.11 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 3.27919 GB +tricount time: 9.548842 sec (outer product method) +tri+prep time: 10.123096 sec (incl time to compute U) +compute C time: 9.335616 sec +reduce (C) time: 0.213226 sec +rate 4.23 million edges/sec (incl time for U=triu(A)) +rate 4.49 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 3.27919 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/Gleich/wb-edu_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 92472210 +nrows 9845726 ncols 9845726 +time to prune self-edges: 0.576111 +time to build the graph with GrB_Matrix_build: 1.507754 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 2.186182 +matrix 9845726 by 9845726, 92472210 entries, from stdin + +total time to read A matrix: 40.350069 sec + +n 9845726 # edges 46236105 +U=triu(A) time: 0.631903 sec +read A, create U memory usage: 3.72283 GB +L=tril(A) time: 0.716852 sec +# triangles 254718147 + +tricount time: 6.667195 sec (dot product method) +tri+prep time: 8.015950 sec (incl time to compute L and U) +compute C time: 6.452076 sec +reduce (C) time: 0.215119 sec +rate 5.77 million edges/sec (incl time for U=triu(A)) +rate 6.93 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 3.72283 GB +tricount time: 2.973400 sec (outer product method) +tri+prep time: 3.605303 sec (incl time to compute U) +compute C time: 2.746121 sec +reduce (C) time: 0.227279 sec +rate 12.82 million edges/sec (incl time for U=triu(A)) +rate 15.55 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 3.72283 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/amazon0302/amazon0302_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 1799584 +nrows 262112 ncols 262112 +time to prune self-edges: 0.018967 +time to build the graph with GrB_Matrix_build: 0.028322 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.080842 +matrix 262112 by 262112, 1799584 entries, from stdin + +total time to read A matrix: 0.826558 sec + +n 262112 # edges 899792 +U=triu(A) time: 0.012514 sec +read A, create U memory usage: 0.0752702 GB +L=tril(A) time: 0.015437 sec +# triangles 717719 + +tricount time: 0.102216 sec (dot product method) +tri+prep time: 0.130167 sec (incl time to compute L and U) +compute C time: 0.099557 sec +reduce (C) time: 0.002659 sec +rate 6.91 million edges/sec (incl time for U=triu(A)) +rate 8.80 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0752703 GB +tricount time: 0.067387 sec (outer product method) +tri+prep time: 0.079901 sec (incl time to compute U) +compute C time: 0.063178 sec +reduce (C) time: 0.004209 sec +rate 11.26 million edges/sec (incl time for U=triu(A)) +rate 13.35 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0752702 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/as-caida20071105/as-caida20071105_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 106762 +nrows 26476 ncols 26476 +time to prune self-edges: 0.001402 +time to build the graph with GrB_Matrix_build: 0.003603 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.005788 +matrix 26476 by 26476, 106762 entries, from stdin + +total time to read A matrix: 0.051959 sec + +n 26476 # edges 53381 +U=triu(A) time: 0.000724 sec +read A, create U memory usage: 0.0049032 GB +L=tril(A) time: 0.000933 sec +# triangles 36365 + +tricount time: 0.007960 sec (dot product method) +tri+prep time: 0.009617 sec (incl time to compute L and U) +compute C time: 0.007879 sec +reduce (C) time: 0.000081 sec +rate 5.55 million edges/sec (incl time for U=triu(A)) +rate 6.71 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00490331 GB +tricount time: 0.003318 sec (outer product method) +tri+prep time: 0.004042 sec (incl time to compute U) +compute C time: 0.003230 sec +reduce (C) time: 0.000088 sec +rate 13.21 million edges/sec (incl time for U=triu(A)) +rate 16.09 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0049032 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/as20000102/as20000102_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 25144 +nrows 6475 ncols 6475 +time to prune self-edges: 0.000212 +time to build the graph with GrB_Matrix_build: 0.000435 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000511 +matrix 6475 by 6475, 25144 entries, from stdin + +total time to read A matrix: 0.010186 sec + +n 6475 # edges 12572 +U=triu(A) time: 0.000266 sec +read A, create U memory usage: 0.00116491 GB +L=tril(A) time: 0.000162 sec +# triangles 6584 + +tricount time: 0.000943 sec (dot product method) +tri+prep time: 0.001371 sec (incl time to compute L and U) +compute C time: 0.000923 sec +reduce (C) time: 0.000020 sec +rate 9.17 million edges/sec (incl time for U=triu(A)) +rate 13.33 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00116503 GB +tricount time: 0.000431 sec (outer product method) +tri+prep time: 0.000697 sec (incl time to compute U) +compute C time: 0.000415 sec +reduce (C) time: 0.000016 sec +rate 18.04 million edges/sec (incl time for U=triu(A)) +rate 29.17 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00116491 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-AstroPh/ca-AstroPh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 396100 +nrows 18773 ncols 18773 +time to prune self-edges: 0.004596 +time to build the graph with GrB_Matrix_build: 0.005205 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.010628 +matrix 18773 by 18773, 396100 entries, from stdin + +total time to read A matrix: 0.162908 sec + +n 18773 # edges 198050 +U=triu(A) time: 0.001974 sec +read A, create U memory usage: 0.0150112 GB +L=tril(A) time: 0.002136 sec +# triangles 1351441 + +tricount time: 0.092058 sec (dot product method) +tri+prep time: 0.096168 sec (incl time to compute L and U) +compute C time: 0.091004 sec +reduce (C) time: 0.001054 sec +rate 2.06 million edges/sec (incl time for U=triu(A)) +rate 2.15 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0150114 GB +tricount time: 0.021874 sec (outer product method) +tri+prep time: 0.023848 sec (incl time to compute U) +compute C time: 0.020912 sec +reduce (C) time: 0.000962 sec +rate 8.30 million edges/sec (incl time for U=triu(A)) +rate 9.05 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0150112 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-CondMat/ca-CondMat_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 186878 +nrows 23134 ncols 23134 +time to prune self-edges: 0.002190 +time to build the graph with GrB_Matrix_build: 0.002779 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.006062 +matrix 23134 by 23134, 186878 entries, from stdin + +total time to read A matrix: 0.084222 sec + +n 23134 # edges 93439 +U=triu(A) time: 0.001204 sec +read A, create U memory usage: 0.0076537 GB +L=tril(A) time: 0.001294 sec +# triangles 173361 + +tricount time: 0.014387 sec (dot product method) +tri+prep time: 0.016885 sec (incl time to compute L and U) +compute C time: 0.013950 sec +reduce (C) time: 0.000437 sec +rate 5.53 million edges/sec (incl time for U=triu(A)) +rate 6.49 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00765381 GB +tricount time: 0.005947 sec (outer product method) +tri+prep time: 0.007151 sec (incl time to compute U) +compute C time: 0.005586 sec +reduce (C) time: 0.000361 sec +rate 13.07 million edges/sec (incl time for U=triu(A)) +rate 15.71 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0076537 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-GrQc/ca-GrQc_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 28968 +nrows 5243 ncols 5243 +time to prune self-edges: 0.000317 +time to build the graph with GrB_Matrix_build: 0.000477 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000718 +matrix 5243 by 5243, 28968 entries, from stdin + +total time to read A matrix: 0.012950 sec + +n 5243 # edges 14484 +U=triu(A) time: 0.000152 sec +read A, create U memory usage: 0.0012533 GB +L=tril(A) time: 0.000204 sec +# triangles 48260 + +tricount time: 0.001085 sec (dot product method) +tri+prep time: 0.001441 sec (incl time to compute L and U) +compute C time: 0.001027 sec +reduce (C) time: 0.000058 sec +rate 10.05 million edges/sec (incl time for U=triu(A)) +rate 13.35 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00125341 GB +tricount time: 0.000817 sec (outer product method) +tri+prep time: 0.000969 sec (incl time to compute U) +compute C time: 0.000730 sec +reduce (C) time: 0.000087 sec +rate 14.95 million edges/sec (incl time for U=triu(A)) +rate 17.73 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0012533 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-HepPh/ca-HepPh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 236978 +nrows 12009 ncols 12009 +time to prune self-edges: 0.002274 +time to build the graph with GrB_Matrix_build: 0.003188 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.005386 +matrix 12009 by 12009, 236978 entries, from stdin + +total time to read A matrix: 0.102559 sec + +n 12009 # edges 118489 +U=triu(A) time: 0.001151 sec +read A, create U memory usage: 0.0090123 GB +L=tril(A) time: 0.001129 sec +# triangles 3358499 + +tricount time: 0.073416 sec (dot product method) +tri+prep time: 0.075696 sec (incl time to compute L and U) +compute C time: 0.072546 sec +reduce (C) time: 0.000870 sec +rate 1.57 million edges/sec (incl time for U=triu(A)) +rate 1.61 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00901241 GB +tricount time: 0.020552 sec (outer product method) +tri+prep time: 0.021703 sec (incl time to compute U) +compute C time: 0.019882 sec +reduce (C) time: 0.000670 sec +rate 5.46 million edges/sec (incl time for U=triu(A)) +rate 5.77 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0090123 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/ca-HepTh/ca-HepTh_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 51946 +nrows 9878 ncols 9878 +time to prune self-edges: 0.000450 +time to build the graph with GrB_Matrix_build: 0.000784 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001298 +matrix 9878 by 9878, 51946 entries, from stdin + +total time to read A matrix: 0.022577 sec + +n 9878 # edges 25973 +U=triu(A) time: 0.000523 sec +read A, create U memory usage: 0.00226591 GB +L=tril(A) time: 0.000562 sec +# triangles 28339 + +tricount time: 0.003701 sec (dot product method) +tri+prep time: 0.004786 sec (incl time to compute L and U) +compute C time: 0.003573 sec +reduce (C) time: 0.000128 sec +rate 5.43 million edges/sec (incl time for U=triu(A)) +rate 7.02 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00226602 GB +tricount time: 0.002143 sec (outer product method) +tri+prep time: 0.002666 sec (incl time to compute U) +compute C time: 0.002005 sec +reduce (C) time: 0.000138 sec +rate 9.74 million edges/sec (incl time for U=triu(A)) +rate 12.12 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00226591 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/email-Enron/email-Enron_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 367662 +nrows 36693 ncols 36693 +time to prune self-edges: 0.004648 +time to build the graph with GrB_Matrix_build: 0.006115 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.009052 +matrix 36693 by 36693, 367662 entries, from stdin + +total time to read A matrix: 0.156779 sec + +n 36693 # edges 183831 +U=triu(A) time: 0.002106 sec +read A, create U memory usage: 0.0147043 GB +L=tril(A) time: 0.002429 sec +# triangles 727044 + +tricount time: 0.088789 sec (dot product method) +tri+prep time: 0.093324 sec (incl time to compute L and U) +compute C time: 0.087905 sec +reduce (C) time: 0.000884 sec +rate 1.97 million edges/sec (incl time for U=triu(A)) +rate 2.07 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0147044 GB +tricount time: 0.020004 sec (outer product method) +tri+prep time: 0.022110 sec (incl time to compute U) +compute C time: 0.018845 sec +reduce (C) time: 0.001159 sec +rate 8.31 million edges/sec (incl time for U=triu(A)) +rate 9.19 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0147043 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/facebook_combined/facebook_combined_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 176468 +nrows 4040 ncols 4040 +time to prune self-edges: 0.002092 +time to build the graph with GrB_Matrix_build: 0.002211 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.002935 +matrix 4040 by 4040, 176468 entries, from stdin + +total time to read A matrix: 0.072332 sec + +n 4040 # edges 88234 +U=triu(A) time: 0.000878 sec +read A, create U memory usage: 0.00651518 GB +L=tril(A) time: 0.000828 sec +# triangles 1612010 + +tricount time: 0.050205 sec (dot product method) +tri+prep time: 0.051911 sec (incl time to compute L and U) +compute C time: 0.049199 sec +reduce (C) time: 0.001006 sec +rate 1.70 million edges/sec (incl time for U=triu(A)) +rate 1.76 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00651529 GB +tricount time: 0.014819 sec (outer product method) +tri+prep time: 0.015697 sec (incl time to compute U) +compute C time: 0.014336 sec +reduce (C) time: 0.000483 sec +rate 5.62 million edges/sec (incl time for U=triu(A)) +rate 5.95 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00651518 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/loc-brightkite_edges/loc-brightkite_edges_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 428156 +nrows 58229 ncols 58229 +time to prune self-edges: 0.005867 +time to build the graph with GrB_Matrix_build: 0.006823 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.011917 +matrix 58229 by 58229, 428156 entries, from stdin + +total time to read A matrix: 0.188092 sec + +n 58229 # edges 214078 +U=triu(A) time: 0.002675 sec +read A, create U memory usage: 0.0177435 GB +L=tril(A) time: 0.004437 sec +# triangles 494728 + +tricount time: 0.055257 sec (dot product method) +tri+prep time: 0.062369 sec (incl time to compute L and U) +compute C time: 0.054656 sec +reduce (C) time: 0.000601 sec +rate 3.43 million edges/sec (incl time for U=triu(A)) +rate 3.87 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0177436 GB +tricount time: 0.020579 sec (outer product method) +tri+prep time: 0.023254 sec (incl time to compute U) +compute C time: 0.019970 sec +reduce (C) time: 0.000609 sec +rate 9.21 million edges/sec (incl time for U=triu(A)) +rate 10.40 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0177435 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/loc-gowalla_edges/loc-gowalla_edges_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 1900654 +nrows 196592 ncols 196592 +time to prune self-edges: 0.021076 +time to build the graph with GrB_Matrix_build: 0.028424 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.071521 +matrix 196592 by 196592, 1900654 entries, from stdin + +total time to read A matrix: 0.849136 sec + +n 196592 # edges 950327 +U=triu(A) time: 0.012400 sec +read A, create U memory usage: 0.076288 GB +L=tril(A) time: 0.014071 sec +# triangles 2273138 + +tricount time: 0.414667 sec (dot product method) +tri+prep time: 0.441138 sec (incl time to compute L and U) +compute C time: 0.411617 sec +reduce (C) time: 0.003050 sec +rate 2.15 million edges/sec (incl time for U=triu(A)) +rate 2.29 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0762881 GB +tricount time: 0.163989 sec (outer product method) +tri+prep time: 0.176389 sec (incl time to compute U) +compute C time: 0.159668 sec +reduce (C) time: 0.004321 sec +rate 5.39 million edges/sec (incl time for U=triu(A)) +rate 5.80 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.076288 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010331/oregon1_010331_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 44004 +nrows 10671 ncols 10671 +time to prune self-edges: 0.000409 +time to build the graph with GrB_Matrix_build: 0.000616 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000953 +matrix 10671 by 10671, 44004 entries, from stdin + +total time to read A matrix: 0.017607 sec + +n 10671 # edges 22002 +U=triu(A) time: 0.000330 sec +read A, create U memory usage: 0.00201171 GB +L=tril(A) time: 0.000387 sec +# triangles 17144 + +tricount time: 0.002490 sec (dot product method) +tri+prep time: 0.003207 sec (incl time to compute L and U) +compute C time: 0.002414 sec +reduce (C) time: 0.000076 sec +rate 6.86 million edges/sec (incl time for U=triu(A)) +rate 8.84 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00201183 GB +tricount time: 0.001768 sec (outer product method) +tri+prep time: 0.002098 sec (incl time to compute U) +compute C time: 0.001718 sec +reduce (C) time: 0.000050 sec +rate 10.49 million edges/sec (incl time for U=triu(A)) +rate 12.44 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00201171 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010407/oregon1_010407_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 43998 +nrows 10730 ncols 10730 +time to prune self-edges: 0.000390 +time to build the graph with GrB_Matrix_build: 0.000673 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001056 +matrix 10730 by 10730, 43998 entries, from stdin + +total time to read A matrix: 0.019438 sec + +n 10730 # edges 21999 +U=triu(A) time: 0.000391 sec +read A, create U memory usage: 0.00201386 GB +L=tril(A) time: 0.000419 sec +# triangles 15834 + +tricount time: 0.003239 sec (dot product method) +tri+prep time: 0.004049 sec (incl time to compute L and U) +compute C time: 0.003148 sec +reduce (C) time: 0.000091 sec +rate 5.43 million edges/sec (incl time for U=triu(A)) +rate 6.79 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00201397 GB +tricount time: 0.001914 sec (outer product method) +tri+prep time: 0.002305 sec (incl time to compute U) +compute C time: 0.001852 sec +reduce (C) time: 0.000062 sec +rate 9.54 million edges/sec (incl time for U=triu(A)) +rate 11.49 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00201386 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010414/oregon1_010414_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 44938 +nrows 10791 ncols 10791 +time to prune self-edges: 0.000977 +time to build the graph with GrB_Matrix_build: 0.001359 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.002172 +matrix 10791 by 10791, 44938 entries, from stdin + +total time to read A matrix: 0.022612 sec + +n 10791 # edges 22469 +U=triu(A) time: 0.000564 sec +read A, create U memory usage: 0.00205014 GB +L=tril(A) time: 0.000768 sec +# triangles 18237 + +tricount time: 0.002585 sec (dot product method) +tri+prep time: 0.003917 sec (incl time to compute L and U) +compute C time: 0.002538 sec +reduce (C) time: 0.000047 sec +rate 5.74 million edges/sec (incl time for U=triu(A)) +rate 8.69 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00205025 GB +tricount time: 0.001534 sec (outer product method) +tri+prep time: 0.002098 sec (incl time to compute U) +compute C time: 0.001487 sec +reduce (C) time: 0.000047 sec +rate 10.71 million edges/sec (incl time for U=triu(A)) +rate 14.65 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00205014 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010421/oregon1_010421_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 45494 +nrows 10860 ncols 10860 +time to prune self-edges: 0.000440 +time to build the graph with GrB_Matrix_build: 0.000640 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001077 +matrix 10860 by 10860, 45494 entries, from stdin + +total time to read A matrix: 0.020277 sec + +n 10860 # edges 22747 +U=triu(A) time: 0.000354 sec +read A, create U memory usage: 0.00207291 GB +L=tril(A) time: 0.000345 sec +# triangles 19108 + +tricount time: 0.002439 sec (dot product method) +tri+prep time: 0.003138 sec (incl time to compute L and U) +compute C time: 0.002394 sec +reduce (C) time: 0.000045 sec +rate 7.25 million edges/sec (incl time for U=triu(A)) +rate 9.33 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00207303 GB +tricount time: 0.001503 sec (outer product method) +tri+prep time: 0.001857 sec (incl time to compute U) +compute C time: 0.001459 sec +reduce (C) time: 0.000044 sec +rate 12.25 million edges/sec (incl time for U=triu(A)) +rate 15.13 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00207291 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010428/oregon1_010428_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 44986 +nrows 10887 ncols 10887 +time to prune self-edges: 0.000590 +time to build the graph with GrB_Matrix_build: 0.001449 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001932 +matrix 10887 by 10887, 44986 entries, from stdin + +total time to read A matrix: 0.020313 sec + +n 10887 # edges 22493 +U=triu(A) time: 0.000461 sec +read A, create U memory usage: 0.00205571 GB +L=tril(A) time: 0.000673 sec +# triangles 17645 + +tricount time: 0.004170 sec (dot product method) +tri+prep time: 0.005304 sec (incl time to compute L and U) +compute C time: 0.004110 sec +reduce (C) time: 0.000060 sec +rate 4.24 million edges/sec (incl time for U=triu(A)) +rate 5.39 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00205582 GB +tricount time: 0.001542 sec (outer product method) +tri+prep time: 0.002003 sec (incl time to compute U) +compute C time: 0.001497 sec +reduce (C) time: 0.000045 sec +rate 11.23 million edges/sec (incl time for U=triu(A)) +rate 14.59 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00205571 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010505/oregon1_010505_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 45214 +nrows 10944 ncols 10944 +time to prune self-edges: 0.000426 +time to build the graph with GrB_Matrix_build: 0.000688 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001103 +matrix 10944 by 10944, 45214 entries, from stdin + +total time to read A matrix: 0.024189 sec + +n 10944 # edges 22607 +U=triu(A) time: 0.000366 sec +read A, create U memory usage: 0.00206619 GB +L=tril(A) time: 0.000374 sec +# triangles 17597 + +tricount time: 0.003353 sec (dot product method) +tri+prep time: 0.004093 sec (incl time to compute L and U) +compute C time: 0.003287 sec +reduce (C) time: 0.000066 sec +rate 5.52 million edges/sec (incl time for U=triu(A)) +rate 6.74 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00206631 GB +tricount time: 0.001795 sec (outer product method) +tri+prep time: 0.002161 sec (incl time to compute U) +compute C time: 0.001750 sec +reduce (C) time: 0.000045 sec +rate 10.46 million edges/sec (incl time for U=triu(A)) +rate 12.59 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00206619 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010512/oregon1_010512_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 45354 +nrows 11012 ncols 11012 +time to prune self-edges: 0.000429 +time to build the graph with GrB_Matrix_build: 0.000670 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001009 +matrix 11012 by 11012, 45354 entries, from stdin + +total time to read A matrix: 0.019987 sec + +n 11012 # edges 22677 +U=triu(A) time: 0.000359 sec +read A, create U memory usage: 0.00207395 GB +L=tril(A) time: 0.000347 sec +# triangles 17598 + +tricount time: 0.002364 sec (dot product method) +tri+prep time: 0.003070 sec (incl time to compute L and U) +compute C time: 0.002320 sec +reduce (C) time: 0.000044 sec +rate 7.39 million edges/sec (incl time for U=triu(A)) +rate 9.59 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00207406 GB +tricount time: 0.001424 sec (outer product method) +tri+prep time: 0.001783 sec (incl time to compute U) +compute C time: 0.001383 sec +reduce (C) time: 0.000041 sec +rate 12.72 million edges/sec (incl time for U=triu(A)) +rate 15.92 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00207395 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010519/oregon1_010519_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 45448 +nrows 11052 ncols 11052 +time to prune self-edges: 0.000410 +time to build the graph with GrB_Matrix_build: 0.000710 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000982 +matrix 11052 by 11052, 45448 entries, from stdin + +total time to read A matrix: 0.022128 sec + +n 11052 # edges 22724 +U=triu(A) time: 0.000353 sec +read A, create U memory usage: 0.00207894 GB +L=tril(A) time: 0.000382 sec +# triangles 17677 + +tricount time: 0.002413 sec (dot product method) +tri+prep time: 0.003148 sec (incl time to compute L and U) +compute C time: 0.002369 sec +reduce (C) time: 0.000044 sec +rate 7.22 million edges/sec (incl time for U=triu(A)) +rate 9.42 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00207905 GB +tricount time: 0.001388 sec (outer product method) +tri+prep time: 0.001741 sec (incl time to compute U) +compute C time: 0.001348 sec +reduce (C) time: 0.000040 sec +rate 13.05 million edges/sec (incl time for U=triu(A)) +rate 16.37 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00207894 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon1_010526/oregon1_010526_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 46818 +nrows 11175 ncols 11175 +time to prune self-edges: 0.000447 +time to build the graph with GrB_Matrix_build: 0.000779 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001168 +matrix 11175 by 11175, 46818 entries, from stdin + +total time to read A matrix: 0.021523 sec + +n 11175 # edges 23409 +U=triu(A) time: 0.000354 sec +read A, create U memory usage: 0.00213318 GB +L=tril(A) time: 0.000437 sec +# triangles 19894 + +tricount time: 0.002728 sec (dot product method) +tri+prep time: 0.003519 sec (incl time to compute L and U) +compute C time: 0.002680 sec +reduce (C) time: 0.000048 sec +rate 6.65 million edges/sec (incl time for U=triu(A)) +rate 8.58 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00213329 GB +tricount time: 0.001566 sec (outer product method) +tri+prep time: 0.001920 sec (incl time to compute U) +compute C time: 0.001518 sec +reduce (C) time: 0.000048 sec +rate 12.19 million edges/sec (incl time for U=triu(A)) +rate 14.95 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00213318 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010331/oregon2_010331_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 62360 +nrows 10901 ncols 10901 +time to prune self-edges: 0.000738 +time to build the graph with GrB_Matrix_build: 0.001290 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001773 +matrix 10901 by 10901, 62360 entries, from stdin + +total time to read A matrix: 0.026980 sec + +n 10901 # edges 31180 +U=triu(A) time: 0.000392 sec +read A, create U memory usage: 0.00268173 GB +L=tril(A) time: 0.000470 sec +# triangles 82856 + +tricount time: 0.006345 sec (dot product method) +tri+prep time: 0.007207 sec (incl time to compute L and U) +compute C time: 0.006241 sec +reduce (C) time: 0.000104 sec +rate 4.33 million edges/sec (incl time for U=triu(A)) +rate 4.91 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00268184 GB +tricount time: 0.002748 sec (outer product method) +tri+prep time: 0.003140 sec (incl time to compute U) +compute C time: 0.002656 sec +reduce (C) time: 0.000092 sec +rate 9.93 million edges/sec (incl time for U=triu(A)) +rate 11.35 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00268173 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010407/oregon2_010407_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 61710 +nrows 10982 ncols 10982 +time to prune self-edges: 0.000500 +time to build the graph with GrB_Matrix_build: 0.000947 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001347 +matrix 10982 by 10982, 61710 entries, from stdin + +total time to read A matrix: 0.027281 sec + +n 10982 # edges 30855 +U=triu(A) time: 0.000373 sec +read A, create U memory usage: 0.00266157 GB +L=tril(A) time: 0.000460 sec +# triangles 78138 + +tricount time: 0.006051 sec (dot product method) +tri+prep time: 0.006884 sec (incl time to compute L and U) +compute C time: 0.005954 sec +reduce (C) time: 0.000097 sec +rate 4.48 million edges/sec (incl time for U=triu(A)) +rate 5.10 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00266168 GB +tricount time: 0.002709 sec (outer product method) +tri+prep time: 0.003082 sec (incl time to compute U) +compute C time: 0.002607 sec +reduce (C) time: 0.000102 sec +rate 10.01 million edges/sec (incl time for U=triu(A)) +rate 11.39 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00266157 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010414/oregon2_010414_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63522 +nrows 11020 ncols 11020 +time to prune self-edges: 0.000558 +time to build the graph with GrB_Matrix_build: 0.000955 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001360 +matrix 11020 by 11020, 63522 entries, from stdin + +total time to read A matrix: 0.028833 sec + +n 11020 # edges 31761 +U=triu(A) time: 0.000462 sec +read A, create U memory usage: 0.00272832 GB +L=tril(A) time: 0.000441 sec +# triangles 88905 + +tricount time: 0.006651 sec (dot product method) +tri+prep time: 0.007554 sec (incl time to compute L and U) +compute C time: 0.006548 sec +reduce (C) time: 0.000103 sec +rate 4.20 million edges/sec (incl time for U=triu(A)) +rate 4.78 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00272843 GB +tricount time: 0.002606 sec (outer product method) +tri+prep time: 0.003068 sec (incl time to compute U) +compute C time: 0.002515 sec +reduce (C) time: 0.000091 sec +rate 10.35 million edges/sec (incl time for U=triu(A)) +rate 12.19 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00272832 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010421/oregon2_010421_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63076 +nrows 11081 ncols 11081 +time to prune self-edges: 0.000470 +time to build the graph with GrB_Matrix_build: 0.001295 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001562 +matrix 11081 by 11081, 63076 entries, from stdin + +total time to read A matrix: 0.028310 sec + +n 11081 # edges 31538 +U=triu(A) time: 0.000388 sec +read A, create U memory usage: 0.0027147 GB +L=tril(A) time: 0.000439 sec +# triangles 82129 + +tricount time: 0.006299 sec (dot product method) +tri+prep time: 0.007126 sec (incl time to compute L and U) +compute C time: 0.006200 sec +reduce (C) time: 0.000099 sec +rate 4.43 million edges/sec (incl time for U=triu(A)) +rate 5.01 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00271482 GB +tricount time: 0.002534 sec (outer product method) +tri+prep time: 0.002922 sec (incl time to compute U) +compute C time: 0.002445 sec +reduce (C) time: 0.000089 sec +rate 10.79 million edges/sec (incl time for U=triu(A)) +rate 12.45 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0027147 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010428/oregon2_010428_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 62868 +nrows 11114 ncols 11114 +time to prune self-edges: 0.000802 +time to build the graph with GrB_Matrix_build: 0.001211 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001888 +matrix 11114 by 11114, 62868 entries, from stdin + +total time to read A matrix: 0.028803 sec + +n 11114 # edges 31434 +U=triu(A) time: 0.000474 sec +read A, create U memory usage: 0.00270854 GB +L=tril(A) time: 0.000466 sec +# triangles 78000 + +tricount time: 0.006181 sec (dot product method) +tri+prep time: 0.007121 sec (incl time to compute L and U) +compute C time: 0.006084 sec +reduce (C) time: 0.000097 sec +rate 4.41 million edges/sec (incl time for U=triu(A)) +rate 5.09 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00270865 GB +tricount time: 0.002797 sec (outer product method) +tri+prep time: 0.003271 sec (incl time to compute U) +compute C time: 0.002709 sec +reduce (C) time: 0.000088 sec +rate 9.61 million edges/sec (incl time for U=triu(A)) +rate 11.24 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00270854 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010505/oregon2_010505_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 61886 +nrows 11158 ncols 11158 +time to prune self-edges: 0.000630 +time to build the graph with GrB_Matrix_build: 0.001162 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001693 +matrix 11158 by 11158, 61886 entries, from stdin + +total time to read A matrix: 0.029585 sec + +n 11158 # edges 30943 +U=triu(A) time: 0.000388 sec +read A, create U memory usage: 0.00267495 GB +L=tril(A) time: 0.000452 sec +# triangles 72182 + +tricount time: 0.005858 sec (dot product method) +tri+prep time: 0.006698 sec (incl time to compute L and U) +compute C time: 0.005765 sec +reduce (C) time: 0.000093 sec +rate 4.62 million edges/sec (incl time for U=triu(A)) +rate 5.28 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00267506 GB +tricount time: 0.002579 sec (outer product method) +tri+prep time: 0.002967 sec (incl time to compute U) +compute C time: 0.002478 sec +reduce (C) time: 0.000101 sec +rate 10.43 million edges/sec (incl time for U=triu(A)) +rate 12.00 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00267495 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010512/oregon2_010512_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 62606 +nrows 11261 ncols 11261 +time to prune self-edges: 0.000754 +time to build the graph with GrB_Matrix_build: 0.001433 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001551 +matrix 11261 by 11261, 62606 entries, from stdin + +total time to read A matrix: 0.029418 sec + +n 11261 # edges 31303 +U=triu(A) time: 0.000446 sec +read A, create U memory usage: 0.00270499 GB +L=tril(A) time: 0.000483 sec +# triangles 72866 + +tricount time: 0.006048 sec (dot product method) +tri+prep time: 0.006977 sec (incl time to compute L and U) +compute C time: 0.005937 sec +reduce (C) time: 0.000111 sec +rate 4.49 million edges/sec (incl time for U=triu(A)) +rate 5.18 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0027051 GB +tricount time: 0.002596 sec (outer product method) +tri+prep time: 0.003042 sec (incl time to compute U) +compute C time: 0.002510 sec +reduce (C) time: 0.000086 sec +rate 10.29 million edges/sec (incl time for U=triu(A)) +rate 12.06 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00270499 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010519/oregon2_010519_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 64574 +nrows 11376 ncols 11376 +time to prune self-edges: 0.001143 +time to build the graph with GrB_Matrix_build: 0.001664 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001735 +matrix 11376 by 11376, 64574 entries, from stdin + +total time to read A matrix: 0.032550 sec + +n 11376 # edges 32287 +U=triu(A) time: 0.000493 sec +read A, create U memory usage: 0.00278043 GB +L=tril(A) time: 0.000586 sec +# triangles 83709 + +tricount time: 0.008705 sec (dot product method) +tri+prep time: 0.009784 sec (incl time to compute L and U) +compute C time: 0.008560 sec +reduce (C) time: 0.000145 sec +rate 3.30 million edges/sec (incl time for U=triu(A)) +rate 3.71 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00278055 GB +tricount time: 0.003601 sec (outer product method) +tri+prep time: 0.004094 sec (incl time to compute U) +compute C time: 0.003442 sec +reduce (C) time: 0.000159 sec +rate 7.89 million edges/sec (incl time for U=triu(A)) +rate 8.97 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00278043 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/oregon2_010526/oregon2_010526_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 65460 +nrows 11462 ncols 11462 +time to prune self-edges: 0.000625 +time to build the graph with GrB_Matrix_build: 0.001647 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.002518 +matrix 11462 by 11462, 65460 entries, from stdin + +total time to read A matrix: 0.028377 sec + +n 11462 # edges 32730 +U=triu(A) time: 0.000547 sec +read A, create U memory usage: 0.00281577 GB +L=tril(A) time: 0.000737 sec +# triangles 89541 + +tricount time: 0.007340 sec (dot product method) +tri+prep time: 0.008624 sec (incl time to compute L and U) +compute C time: 0.007231 sec +reduce (C) time: 0.000109 sec +rate 3.80 million edges/sec (incl time for U=triu(A)) +rate 4.46 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00281588 GB +tricount time: 0.003047 sec (outer product method) +tri+prep time: 0.003594 sec (incl time to compute U) +compute C time: 0.002937 sec +reduce (C) time: 0.000110 sec +rate 9.11 million edges/sec (incl time for U=triu(A)) +rate 10.74 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00281577 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella04/p2p-Gnutella04_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 79988 +nrows 10877 ncols 10877 +time to prune self-edges: 0.001187 +time to build the graph with GrB_Matrix_build: 0.002092 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.003985 +matrix 10877 by 10877, 79988 entries, from stdin + +total time to read A matrix: 0.041534 sec + +n 10877 # edges 39994 +U=triu(A) time: 0.001157 sec +read A, create U memory usage: 0.00331538 GB +L=tril(A) time: 0.000763 sec +# triangles 934 + +tricount time: 0.002664 sec (dot product method) +tri+prep time: 0.004584 sec (incl time to compute L and U) +compute C time: 0.002649 sec +reduce (C) time: 0.000015 sec +rate 8.72 million edges/sec (incl time for U=triu(A)) +rate 15.01 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00331549 GB +tricount time: 0.001082 sec (outer product method) +tri+prep time: 0.002239 sec (incl time to compute U) +compute C time: 0.001071 sec +reduce (C) time: 0.000011 sec +rate 17.86 million edges/sec (incl time for U=triu(A)) +rate 36.96 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00331538 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella05/p2p-Gnutella05_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63678 +nrows 8847 ncols 8847 +time to prune self-edges: 0.000447 +time to build the graph with GrB_Matrix_build: 0.000861 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.002284 +matrix 8847 by 8847, 63678 entries, from stdin + +total time to read A matrix: 0.026085 sec + +n 8847 # edges 31839 +U=triu(A) time: 0.000481 sec +read A, create U memory usage: 0.00264702 GB +L=tril(A) time: 0.000454 sec +# triangles 1112 + +tricount time: 0.002412 sec (dot product method) +tri+prep time: 0.003347 sec (incl time to compute L and U) +compute C time: 0.002396 sec +reduce (C) time: 0.000016 sec +rate 9.51 million edges/sec (incl time for U=triu(A)) +rate 13.20 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00264713 GB +tricount time: 0.000837 sec (outer product method) +tri+prep time: 0.001318 sec (incl time to compute U) +compute C time: 0.000830 sec +reduce (C) time: 0.000007 sec +rate 24.16 million edges/sec (incl time for U=triu(A)) +rate 38.04 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00264702 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella06/p2p-Gnutella06_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63050 +nrows 8718 ncols 8718 +time to prune self-edges: 0.000801 +time to build the graph with GrB_Matrix_build: 0.001160 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001851 +matrix 8718 by 8718, 63050 entries, from stdin + +total time to read A matrix: 0.030651 sec + +n 8718 # edges 31525 +U=triu(A) time: 0.000432 sec +read A, create U memory usage: 0.00261925 GB +L=tril(A) time: 0.000439 sec +# triangles 1142 + +tricount time: 0.001987 sec (dot product method) +tri+prep time: 0.002858 sec (incl time to compute L and U) +compute C time: 0.001975 sec +reduce (C) time: 0.000012 sec +rate 11.03 million edges/sec (incl time for U=triu(A)) +rate 15.87 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00261936 GB +tricount time: 0.001066 sec (outer product method) +tri+prep time: 0.001498 sec (incl time to compute U) +compute C time: 0.001051 sec +reduce (C) time: 0.000015 sec +rate 21.04 million edges/sec (incl time for U=triu(A)) +rate 29.57 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00261925 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella08/p2p-Gnutella08_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 41554 +nrows 6302 ncols 6302 +time to prune self-edges: 0.000442 +time to build the graph with GrB_Matrix_build: 0.000606 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.000993 +matrix 6302 by 6302, 41554 entries, from stdin + +total time to read A matrix: 0.016642 sec + +n 6302 # edges 20777 +U=triu(A) time: 0.000352 sec +read A, create U memory usage: 0.00174875 GB +L=tril(A) time: 0.000328 sec +# triangles 2383 + +tricount time: 0.001706 sec (dot product method) +tri+prep time: 0.002386 sec (incl time to compute L and U) +compute C time: 0.001689 sec +reduce (C) time: 0.000017 sec +rate 8.71 million edges/sec (incl time for U=triu(A)) +rate 12.18 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00174887 GB +tricount time: 0.000680 sec (outer product method) +tri+prep time: 0.001032 sec (incl time to compute U) +compute C time: 0.000666 sec +reduce (C) time: 0.000014 sec +rate 20.13 million edges/sec (incl time for U=triu(A)) +rate 30.55 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00174875 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella09/p2p-Gnutella09_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 52026 +nrows 8115 ncols 8115 +time to prune self-edges: 0.000542 +time to build the graph with GrB_Matrix_build: 0.000858 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.001280 +matrix 8115 by 8115, 52026 entries, from stdin + +total time to read A matrix: 0.025467 sec + +n 8115 # edges 26013 +U=triu(A) time: 0.000348 sec +read A, create U memory usage: 0.00219827 GB +L=tril(A) time: 0.000369 sec +# triangles 2354 + +tricount time: 0.001726 sec (dot product method) +tri+prep time: 0.002443 sec (incl time to compute L and U) +compute C time: 0.001711 sec +reduce (C) time: 0.000015 sec +rate 10.65 million edges/sec (incl time for U=triu(A)) +rate 15.07 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00219838 GB +tricount time: 0.000725 sec (outer product method) +tri+prep time: 0.001073 sec (incl time to compute U) +compute C time: 0.000712 sec +reduce (C) time: 0.000013 sec +rate 24.24 million edges/sec (incl time for U=triu(A)) +rate 35.88 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00219827 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella24/p2p-Gnutella24_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 130738 +nrows 26519 ncols 26519 +time to prune self-edges: 0.000926 +time to build the graph with GrB_Matrix_build: 0.002056 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.005171 +matrix 26519 by 26519, 130738 entries, from stdin + +total time to read A matrix: 0.058763 sec + +n 26519 # edges 65369 +U=triu(A) time: 0.001766 sec +read A, create U memory usage: 0.00576806 GB +L=tril(A) time: 0.002073 sec +# triangles 986 + +tricount time: 0.004597 sec (dot product method) +tri+prep time: 0.008436 sec (incl time to compute L and U) +compute C time: 0.004580 sec +reduce (C) time: 0.000017 sec +rate 7.75 million edges/sec (incl time for U=triu(A)) +rate 14.22 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00576817 GB +tricount time: 0.001650 sec (outer product method) +tri+prep time: 0.003416 sec (incl time to compute U) +compute C time: 0.001640 sec +reduce (C) time: 0.000010 sec +rate 19.14 million edges/sec (incl time for U=triu(A)) +rate 39.62 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00576806 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella25/p2p-Gnutella25_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 109410 +nrows 22688 ncols 22688 +time to prune self-edges: 0.001459 +time to build the graph with GrB_Matrix_build: 0.003332 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.005903 +matrix 22688 by 22688, 109410 entries, from stdin + +total time to read A matrix: 0.051939 sec + +n 22688 # edges 54705 +U=triu(A) time: 0.000750 sec +read A, create U memory usage: 0.00484701 GB +L=tril(A) time: 0.000981 sec +# triangles 806 + +tricount time: 0.002698 sec (dot product method) +tri+prep time: 0.004429 sec (incl time to compute L and U) +compute C time: 0.002686 sec +reduce (C) time: 0.000012 sec +rate 12.35 million edges/sec (incl time for U=triu(A)) +rate 20.28 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00484712 GB +tricount time: 0.001360 sec (outer product method) +tri+prep time: 0.002110 sec (incl time to compute U) +compute C time: 0.001352 sec +reduce (C) time: 0.000008 sec +rate 25.93 million edges/sec (incl time for U=triu(A)) +rate 40.22 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00484701 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella30/p2p-Gnutella30_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 176656 +nrows 36683 ncols 36683 +time to prune self-edges: 0.002554 +time to build the graph with GrB_Matrix_build: 0.003621 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.004834 +matrix 36683 by 36683, 176656 entries, from stdin + +total time to read A matrix: 0.080749 sec + +n 36683 # edges 88328 +U=triu(A) time: 0.001157 sec +read A, create U memory usage: 0.00782767 GB +L=tril(A) time: 0.001487 sec +# triangles 1590 + +tricount time: 0.004600 sec (dot product method) +tri+prep time: 0.007244 sec (incl time to compute L and U) +compute C time: 0.004582 sec +reduce (C) time: 0.000018 sec +rate 12.19 million edges/sec (incl time for U=triu(A)) +rate 19.20 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.00782778 GB +tricount time: 0.002222 sec (outer product method) +tri+prep time: 0.003379 sec (incl time to compute U) +compute C time: 0.002208 sec +reduce (C) time: 0.000014 sec +rate 26.14 million edges/sec (incl time for U=triu(A)) +rate 39.75 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.00782767 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/p2p-Gnutella31/p2p-Gnutella31_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 295784 +nrows 62587 ncols 62587 +time to prune self-edges: 0.004239 +time to build the graph with GrB_Matrix_build: 0.004840 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.008114 +matrix 62587 by 62587, 295784 entries, from stdin + +total time to read A matrix: 0.132495 sec + +n 62587 # edges 147892 +U=triu(A) time: 0.002101 sec +read A, create U memory usage: 0.0131524 GB +L=tril(A) time: 0.002458 sec +# triangles 2024 + +tricount time: 0.008683 sec (dot product method) +tri+prep time: 0.013242 sec (incl time to compute L and U) +compute C time: 0.008659 sec +reduce (C) time: 0.000024 sec +rate 11.17 million edges/sec (incl time for U=triu(A)) +rate 17.03 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0131525 GB +tricount time: 0.003919 sec (outer product method) +tri+prep time: 0.006020 sec (incl time to compute U) +compute C time: 0.003903 sec +reduce (C) time: 0.000016 sec +rate 24.57 million edges/sec (incl time for U=triu(A)) +rate 37.74 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0131524 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/roadNet-CA/roadNet-CA_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 5533214 +nrows 1965207 ncols 1965207 +time to prune self-edges: 0.075274 +time to build the graph with GrB_Matrix_build: 0.100845 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.171370 +matrix 1965207 by 1965207, 5533214 entries, from stdin + +total time to read A matrix: 2.554279 sec + +n 1965207 # edges 2766607 +U=triu(A) time: 0.060409 sec +read A, create U memory usage: 0.277805 GB +L=tril(A) time: 0.064857 sec +# triangles 120676 + +tricount time: 0.072494 sec (dot product method) +tri+prep time: 0.197760 sec (incl time to compute L and U) +compute C time: 0.070372 sec +reduce (C) time: 0.002122 sec +rate 13.99 million edges/sec (incl time for U=triu(A)) +rate 38.16 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.277805 GB +tricount time: 0.208712 sec (outer product method) +tri+prep time: 0.269121 sec (incl time to compute U) +compute C time: 0.203091 sec +reduce (C) time: 0.005621 sec +rate 10.28 million edges/sec (incl time for U=triu(A)) +rate 13.26 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.314854 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/roadNet-PA/roadNet-PA_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 3083796 +nrows 1088093 ncols 1088093 +time to prune self-edges: 0.037057 +time to build the graph with GrB_Matrix_build: 0.055761 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.107731 +matrix 1088093 by 1088093, 3083796 entries, from stdin + +total time to read A matrix: 1.362048 sec + +n 1088093 # edges 1541898 +U=triu(A) time: 0.034091 sec +read A, create U memory usage: 0.154541 GB +L=tril(A) time: 0.042679 sec +# triangles 67150 + +tricount time: 0.040433 sec (dot product method) +tri+prep time: 0.117203 sec (incl time to compute L and U) +compute C time: 0.038600 sec +reduce (C) time: 0.001833 sec +rate 13.16 million edges/sec (incl time for U=triu(A)) +rate 38.13 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.154541 GB +tricount time: 0.125185 sec (outer product method) +tri+prep time: 0.159276 sec (incl time to compute U) +compute C time: 0.122126 sec +reduce (C) time: 0.003059 sec +rate 9.68 million edges/sec (incl time for U=triu(A)) +rate 12.32 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.174968 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/snap/roadNet-TX/roadNet-TX_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 3843320 +nrows 1379918 ncols 1379918 +time to prune self-edges: 0.038438 +time to build the graph with GrB_Matrix_build: 0.067983 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.119627 +matrix 1379918 by 1379918, 3843320 entries, from stdin + +total time to read A matrix: 1.743967 sec + +n 1379918 # edges 1921660 +U=triu(A) time: 0.044294 sec +read A, create U memory usage: 0.193557 GB +L=tril(A) time: 0.045826 sec +# triangles 82869 + +tricount time: 0.048445 sec (dot product method) +tri+prep time: 0.138565 sec (incl time to compute L and U) +compute C time: 0.046511 sec +reduce (C) time: 0.001934 sec +rate 13.87 million edges/sec (incl time for U=triu(A)) +rate 39.67 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.193557 GB +tricount time: 0.146599 sec (outer product method) +tri+prep time: 0.190893 sec (incl time to compute U) +compute C time: 0.142803 sec +reduce (C) time: 0.003796 sec +rate 10.07 million edges/sec (incl time for U=triu(A)) +rate 13.11 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.21841 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-1045506-262144_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 2091012 +nrows 262145 ncols 262145 +time to prune self-edges: 0.020682 +time to build the graph with GrB_Matrix_build: 0.032536 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.044040 +matrix 262145 by 262145, 2091012 entries, from stdin + +total time to read A matrix: 0.892081 sec + +n 262145 # edges 1045506 +U=triu(A) time: 0.010818 sec +read A, create U memory usage: 0.085763 GB +L=tril(A) time: 0.011947 sec +# triangles 1044484 + +tricount time: 0.025458 sec (dot product method) +tri+prep time: 0.048223 sec (incl time to compute L and U) +compute C time: 0.022125 sec +reduce (C) time: 0.003333 sec +rate 21.68 million edges/sec (incl time for U=triu(A)) +rate 41.07 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0857631 GB +tricount time: 0.022758 sec (outer product method) +tri+prep time: 0.033576 sec (incl time to compute U) +compute C time: 0.016410 sec +reduce (C) time: 0.006348 sec +rate 31.14 million edges/sec (incl time for U=triu(A)) +rate 45.94 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.085763 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-16764930-4194304_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 33529860 +nrows 4194305 ncols 4194305 +time to prune self-edges: 0.258325 +time to build the graph with GrB_Matrix_build: 0.478826 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.719983 +matrix 4194305 by 4194305, 33529860 entries, from stdin + +total time to read A matrix: 14.923557 sec + +n 4194305 # edges 16764930 +U=triu(A) time: 0.207199 sec +read A, create U memory usage: 1.37485 GB +L=tril(A) time: 0.239114 sec +# triangles 16760836 + +tricount time: 0.408435 sec (dot product method) +tri+prep time: 0.854748 sec (incl time to compute L and U) +compute C time: 0.333277 sec +reduce (C) time: 0.075158 sec +rate 19.61 million edges/sec (incl time for U=triu(A)) +rate 41.05 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 1.37485 GB +tricount time: 0.333839 sec (outer product method) +tri+prep time: 0.541038 sec (incl time to compute U) +compute C time: 0.260293 sec +reduce (C) time: 0.073546 sec +rate 30.99 million edges/sec (incl time for U=triu(A)) +rate 50.22 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 1.37485 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-260610-65536_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 521220 +nrows 65537 ncols 65537 +time to prune self-edges: 0.004806 +time to build the graph with GrB_Matrix_build: 0.006935 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.008959 +matrix 65537 by 65537, 521220 entries, from stdin + +total time to read A matrix: 0.223239 sec + +n 65537 # edges 260610 +U=triu(A) time: 0.002305 sec +read A, create U memory usage: 0.0213861 GB +L=tril(A) time: 0.003366 sec +# triangles 260100 + +tricount time: 0.005988 sec (dot product method) +tri+prep time: 0.011659 sec (incl time to compute L and U) +compute C time: 0.005028 sec +reduce (C) time: 0.000960 sec +rate 22.35 million edges/sec (incl time for U=triu(A)) +rate 43.52 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.0213862 GB +tricount time: 0.004570 sec (outer product method) +tri+prep time: 0.006875 sec (incl time to compute U) +compute C time: 0.003651 sec +reduce (C) time: 0.000919 sec +rate 37.91 million edges/sec (incl time for U=triu(A)) +rate 57.03 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.0213861 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-268386306-67108864_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 536772612 +nrows 67108865 ncols 67108865 +time to prune self-edges: 4.847868 +time to build the graph with GrB_Matrix_build: 27.130811 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 15.864716 +matrix 67108865 by 67108865, 536772612 entries, from stdin + +total time to read A matrix: 275.608749 sec + +n 67108865 # edges 268386306 +U=triu(A) time: 3.626250 sec +read A, create U memory usage: 22.0082 GB +L=tril(A) time: 4.967097 sec +# triangles 268369924 + +tricount time: 9.462760 sec (dot product method) +tri+prep time: 18.056107 sec (incl time to compute L and U) +compute C time: 8.443604 sec +reduce (C) time: 1.019156 sec +rate 14.86 million edges/sec (incl time for U=triu(A)) +rate 28.36 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 22.0082 GB +tricount time: 5.551354 sec (outer product method) +tri+prep time: 9.177604 sec (incl time to compute U) +compute C time: 4.574764 sec +reduce (C) time: 0.976590 sec +rate 29.24 million edges/sec (incl time for U=triu(A)) +rate 48.35 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 22.0082 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/image-grid/g-4188162-1048576_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 8376324 +nrows 1048577 ncols 1048577 +time to prune self-edges: 0.078030 +time to build the graph with GrB_Matrix_build: 0.113138 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.167927 +matrix 1048577 by 1048577, 8376324 entries, from stdin + +total time to read A matrix: 3.495200 sec + +n 1048577 # edges 4188162 +U=triu(A) time: 0.054021 sec +read A, create U memory usage: 0.343491 GB +L=tril(A) time: 0.056426 sec +# triangles 4186116 + +tricount time: 0.104686 sec (dot product method) +tri+prep time: 0.215133 sec (incl time to compute L and U) +compute C time: 0.088667 sec +reduce (C) time: 0.016019 sec +rate 19.47 million edges/sec (incl time for U=triu(A)) +rate 40.01 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.343492 GB +tricount time: 0.083969 sec (outer product method) +tri+prep time: 0.137990 sec (incl time to compute U) +compute C time: 0.060223 sec +reduce (C) time: 0.023746 sec +rate 30.35 million edges/sec (incl time for U=triu(A)) +rate 49.88 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.343491 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/DIMACS10/hugebubbles-00020_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63580358 +nrows 21198120 ncols 21198120 +time to prune self-edges: 0.427916 +time to build the graph with GrB_Matrix_build: 0.915098 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 5.492616 +matrix 21198120 by 21198120, 63580358 entries, from stdin + +total time to read A matrix: 32.635390 sec + +n 21198120 # edges 31790179 +U=triu(A) time: 0.596401 sec +read A, create U memory usage: 3.13682 GB +L=tril(A) time: 0.609073 sec +# triangles 0 + +tricount time: 2.211278 sec (dot product method) +tri+prep time: 3.416752 sec (incl time to compute L and U) +compute C time: 2.197254 sec +reduce (C) time: 0.014024 sec +rate 9.30 million edges/sec (incl time for U=triu(A)) +rate 14.38 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 3.13682 GB +tricount time: 5.971539 sec (outer product method) +tri+prep time: 6.567940 sec (incl time to compute U) +compute C time: 5.959543 sec +reduce (C) time: 0.011996 sec +rate 4.84 million edges/sec (incl time for U=triu(A)) +rate 5.32 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 3.39831 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/vanHeukelum/cage15_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 94044692 +nrows 5154860 ncols 5154860 +time to prune self-edges: 0.584438 +time to build the graph with GrB_Matrix_build: 1.334187 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 2.717755 +matrix 5154860 by 5154860, 94044692 entries, from stdin + +total time to read A matrix: 42.787545 sec + +n 5154860 # edges 47022346 +U=triu(A) time: 0.567069 sec +read A, create U memory usage: 3.5918 GB +L=tril(A) time: 0.625895 sec +# triangles 36106416 + +tricount time: 3.214097 sec (dot product method) +tri+prep time: 4.407061 sec (incl time to compute L and U) +compute C time: 3.049724 sec +reduce (C) time: 0.164373 sec +rate 10.67 million edges/sec (incl time for U=triu(A)) +rate 14.63 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 3.5918 GB +tricount time: 1.876248 sec (outer product method) +tri+prep time: 2.443317 sec (incl time to compute U) +compute C time: 1.706507 sec +reduce (C) time: 0.169741 sec +rate 19.25 million edges/sec (incl time for U=triu(A)) +rate 25.06 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 3.5918 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/ssget/Freescale/circuit5M_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 53967852 +nrows 5558327 ncols 5558327 +time to prune self-edges: 0.406535 +time to build the graph with GrB_Matrix_build: 0.722260 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 1.163145 +matrix 5558327 by 5558327, 53967852 entries, from stdin + +total time to read A matrix: 24.351769 sec + +n 5558327 # edges 26983926 +U=triu(A) time: 0.344639 sec +read A, create U memory usage: 2.16518 GB +L=tril(A) time: 0.327884 sec +# triangles 31019473 + +tricount time: 2.146223 sec (dot product method) +tri+prep time: 2.818746 sec (incl time to compute L and U) +compute C time: 1.992131 sec +reduce (C) time: 0.154092 sec +rate 9.57 million edges/sec (incl time for U=triu(A)) +rate 12.57 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 2.16518 GB +tricount time: 193.796997 sec (outer product method) +tri+prep time: 194.141636 sec (incl time to compute U) +compute C time: 193.645246 sec +reduce (C) time: 0.151751 sec +rate 0.14 million edges/sec (incl time for U=triu(A)) +rate 0.14 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 2.16518 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale18-ef16/graph500-scale18-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 7600696 +nrows 174148 ncols 174148 +time to prune self-edges: 0.081333 +time to build the graph with GrB_Matrix_build: 0.102280 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 0.429054 +matrix 174148 by 174148, 7600696 entries, from stdin + +total time to read A matrix: 3.357724 sec + +n 174148 # edges 3800348 +U=triu(A) time: 0.048870 sec +read A, create U memory usage: 0.280592 GB +L=tril(A) time: 0.041905 sec +# triangles 82287285 + +tricount time: 21.548354 sec (dot product method) +tri+prep time: 21.639129 sec (incl time to compute L and U) +compute C time: 21.527304 sec +reduce (C) time: 0.021050 sec +rate 0.18 million edges/sec (incl time for U=triu(A)) +rate 0.18 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.280592 GB +tricount time: 3.179468 sec (outer product method) +tri+prep time: 3.228338 sec (incl time to compute U) +compute C time: 3.158735 sec +reduce (C) time: 0.020733 sec +rate 1.18 million edges/sec (incl time for U=triu(A)) +rate 1.20 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.280592 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale19-ef16/graph500-scale19-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 15459350 +nrows 335319 ncols 335319 +time to prune self-edges: 0.157659 +time to build the graph with GrB_Matrix_build: 0.203581 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 1.119346 +matrix 335319 by 335319, 15459350 entries, from stdin + +total time to read A matrix: 7.319427 sec + +n 335319 # edges 7729675 +U=triu(A) time: 0.086287 sec +read A, create U memory usage: 0.56995 GB +L=tril(A) time: 0.097857 sec +# triangles 186288972 + +tricount time: 60.416342 sec (dot product method) +tri+prep time: 60.600486 sec (incl time to compute L and U) +compute C time: 60.373811 sec +reduce (C) time: 0.042531 sec +rate 0.13 million edges/sec (incl time for U=triu(A)) +rate 0.13 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.56995 GB +tricount time: 8.271895 sec (outer product method) +tri+prep time: 8.358182 sec (incl time to compute U) +compute C time: 8.226877 sec +reduce (C) time: 0.045018 sec +rate 0.92 million edges/sec (incl time for U=triu(A)) +rate 0.93 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 0.56995 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale20-ef16/graph500-scale20-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 31361722 +nrows 645821 ncols 645821 +time to prune self-edges: 0.256138 +time to build the graph with GrB_Matrix_build: 0.417742 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 2.767536 +matrix 645821 by 645821, 31361722 entries, from stdin + +total time to read A matrix: 15.404044 sec + +n 645821 # edges 15680861 +U=triu(A) time: 0.191732 sec +read A, create U memory usage: 1.15486 GB +L=tril(A) time: 0.201807 sec +# triangles 419349784 + +tricount time: 159.881690 sec (dot product method) +tri+prep time: 160.275229 sec (incl time to compute L and U) +compute C time: 159.792297 sec +reduce (C) time: 0.089393 sec +rate 0.10 million edges/sec (incl time for U=triu(A)) +rate 0.10 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 1.15486 GB +tricount time: 23.223368 sec (outer product method) +tri+prep time: 23.415100 sec (incl time to compute U) +compute C time: 23.140289 sec +reduce (C) time: 0.083079 sec +rate 0.67 million edges/sec (incl time for U=triu(A)) +rate 0.68 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 1.15486 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale21-ef16/graph500-scale21-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 63463300 +nrows 1243073 ncols 1243073 +time to prune self-edges: 0.422889 +time to build the graph with GrB_Matrix_build: 0.846625 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 7.622181 +matrix 1243073 by 1243073, 63463300 entries, from stdin + +total time to read A matrix: 33.812114 sec + +n 1243073 # edges 31731650 +U=triu(A) time: 0.358999 sec +read A, create U memory usage: 2.3344 GB +L=tril(A) time: 0.383012 sec +# triangles 935100883 + +tricount time: 408.736400 sec (dot product method) +tri+prep time: 409.478411 sec (incl time to compute L and U) +compute C time: 408.574741 sec +reduce (C) time: 0.161659 sec +rate 0.08 million edges/sec (incl time for U=triu(A)) +rate 0.08 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 2.3344 GB +tricount time: 72.219214 sec (outer product method) +tri+prep time: 72.578213 sec (incl time to compute U) +compute C time: 72.060403 sec +reduce (C) time: 0.158811 sec +rate 0.44 million edges/sec (incl time for U=triu(A)) +rate 0.44 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 2.3344 GB + + +-------------------------------------------------------------------------------- +/research/davisgroup/GraphChallenge/synthetic/graph500-scale22-ef16/graph500-scale22-ef16_adj.tsv.gz +-------------------------------------------------------------- +ntuples: 128194008 +nrows 2393286 ncols 2393286 +time to prune self-edges: 0.682650 +time to build the graph with GrB_Matrix_build: 1.865490 +make symmetric +A = (C+C')/2 +A = (C+C')/2 time 20.992640 +matrix 2393286 by 2393286, 128194008 entries, from stdin + +total time to read A matrix: 77.322002 sec + +n 2393286 # edges 64097004 +U=triu(A) time: 0.784672 sec +read A, create U memory usage: 4.71072 GB +L=tril(A) time: 0.811344 sec +# triangles 2067392370 + +tricount time: 1119.452618 sec (dot product method) +tri+prep time: 1121.048634 sec (incl time to compute L and U) +compute C time: 1119.138786 sec +reduce (C) time: 0.313832 sec +rate 0.06 million edges/sec (incl time for U=triu(A)) +rate 0.06 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 4.71072 GB +tricount time: 185.600316 sec (outer product method) +tri+prep time: 186.384988 sec (incl time to compute U) +compute C time: 185.282341 sec +reduce (C) time: 0.317975 sec +rate 0.34 million edges/sec (incl time for U=triu(A)) +rate 0.35 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 4.71072 GB + diff --git a/GraphBLAS/Demo/Output/mis_demo.out b/GraphBLAS/Demo/Output/mis_demo.out index 0d96a7794e..74057dc850 100644 --- a/GraphBLAS/Demo/Output/mis_demo.out +++ b/GraphBLAS/Demo/Output/mis_demo.out @@ -1,22 +1,22 @@ Wathen: nx 4 ny 4 n 65 nz 817 method 0, time: 0.000 sec -MIS time in seconds: 0.000178 +MIS time in seconds: 0.000183 isize: 11 independent set found: 11 of 65 nodes maximal independent set status verified random 5 by 5, nz: 16, method 1 time 0.000 sec -MIS time in seconds: 0.000112 +MIS time in seconds: 0.000137 isize: 2 independent set found: 2 of 5 nodes maximal independent set status verified ntuples: 3 nrows 3 ncols 3 time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000020 +time to build the graph with GrB_Matrix_build: 0.000033 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000025 +A = (C+C')/2 time 0.000018 matrix 3 by 3, 0 entries, from stdin -MIS time in seconds: 0.000062 +MIS time in seconds: 0.000059 isize: 3 independent set found: 3 of 3 nodes maximal independent set status verified @@ -26,166 +26,166 @@ time to prune self-edges: 0.000001 time to build the graph with GrB_Matrix_build: 0.000038 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000037 +A = (C+C')/2 time 0.000039 matrix 4 by 4, 4 entries, from stdin -MIS time in seconds: 0.000100 +MIS time in seconds: 0.000111 isize: 2 independent set found: 2 of 4 nodes maximal independent set status verified ntuples: 10 nrows 4 ncols 4 time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000027 +time to build the graph with GrB_Matrix_build: 0.000040 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000031 +A = (C+C')/2 time 0.000037 matrix 4 by 4, 10 entries, from stdin -MIS time in seconds: 0.000072 +MIS time in seconds: 0.000139 isize: 2 independent set found: 2 of 4 nodes maximal independent set status verified ntuples: 8 nrows 4 ncols 3 -time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000025 +time to prune self-edges: 0.000002 +time to build the graph with GrB_Matrix_build: 0.000040 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000025 +time to construct augmented system: 0.000049 matrix 7 by 7, 16 entries, from stdin -MIS time in seconds: 0.000071 +MIS time in seconds: 0.000134 isize: 3 independent set found: 3 of 7 nodes maximal independent set status verified ntuples: 438 nrows 219 ncols 85 -time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000046 +time to prune self-edges: 0.000002 +time to build the graph with GrB_Matrix_build: 0.000045 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000106 +time to construct augmented system: 0.000103 matrix 304 by 304, 876 entries, from stdin -MIS time in seconds: 0.000282 +MIS time in seconds: 0.000268 isize: 216 independent set found: 216 of 304 nodes maximal independent set status verified ntuples: 224 nrows 48 ncols 48 -time to prune self-edges: 0.000003 -time to build the graph with GrB_Matrix_build: 0.000051 +time to prune self-edges: 0.000002 +time to build the graph with GrB_Matrix_build: 0.000043 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000056 +A = (C+C')/2 time 0.000111 matrix 48 by 48, 352 entries, from stdin -MIS time in seconds: 0.000168 +MIS time in seconds: 0.000191 isize: 12 independent set found: 12 of 48 nodes maximal independent set status verified ntuples: 147631 nrows 4884 ncols 4884 -time to prune self-edges: 0.003113 -time to build the graph with GrB_Matrix_build: 0.002345 +time to prune self-edges: 0.002110 +time to build the graph with GrB_Matrix_build: 0.001935 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.011046 +A = (C+C')/2 time 0.002082 matrix 4884 by 4884, 285494 entries, from stdin -MIS time in seconds: 0.015651 +MIS time in seconds: 0.013503 isize: 274 independent set found: 274 of 4884 nodes maximal independent set status verified ntuples: 1069 nrows 183 ncols 183 -time to prune self-edges: 0.000003 -time to build the graph with GrB_Matrix_build: 0.000041 +time to prune self-edges: 0.000004 +time to build the graph with GrB_Matrix_build: 0.000039 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000062 +A = (C+C')/2 time 0.000046 matrix 183 by 183, 1402 entries, from stdin -MIS time in seconds: 0.000322 +MIS time in seconds: 0.000280 isize: 88 independent set found: 88 of 183 nodes maximal independent set status verified ntuples: 123 nrows 32 ncols 31 -time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000037 +time to prune self-edges: 0.000000 +time to build the graph with GrB_Matrix_build: 0.000040 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000046 +time to construct augmented system: 0.000060 matrix 63 by 63, 246 entries, from stdin -MIS time in seconds: 0.000186 +MIS time in seconds: 0.000193 isize: 27 independent set found: 27 of 63 nodes maximal independent set status verified ntuples: 123 nrows 31 ncols 32 -time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.000035 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000039 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000036 +time to construct augmented system: 0.000058 matrix 63 by 63, 246 entries, from stdin -MIS time in seconds: 0.000144 +MIS time in seconds: 0.000197 isize: 25 independent set found: 25 of 63 nodes maximal independent set status verified ntuples: 102 nrows 27 ncols 51 time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000024 +time to build the graph with GrB_Matrix_build: 0.000036 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000058 +time to construct augmented system: 0.000053 matrix 78 by 78, 204 entries, from stdin -MIS time in seconds: 0.000145 +MIS time in seconds: 0.000160 isize: 49 independent set found: 49 of 78 nodes maximal independent set status verified ntuples: 49920 nrows 492 ncols 490 -time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.001430 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000576 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.005309 +time to construct augmented system: 0.003820 matrix 982 by 982, 99840 entries, from stdin -MIS time in seconds: 0.004667 +MIS time in seconds: 0.003526 isize: 329 independent set found: 329 of 982 nodes maximal independent set status verified ntuples: 299 nrows 67 ncols 67 time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.000081 +time to build the graph with GrB_Matrix_build: 0.000074 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000081 +A = (C+C')/2 time 0.000058 matrix 67 by 67, 574 entries, from stdin -MIS time in seconds: 0.000221 +MIS time in seconds: 0.000213 isize: 15 independent set found: 15 of 67 nodes maximal independent set status verified -Wathen: nx 200 ny 200 n 120801 nz 1883201 method 0, time: 0.305 sec -MIS time in seconds: 0.143966 +Wathen: nx 200 ny 200 n 120801 nz 1883201 method 0, time: 0.304 sec +MIS time in seconds: 0.119166 isize: 18446 independent set found: 18446 of 120801 nodes maximal independent set status verified -random 10000 by 10000, nz: 199746, method 0 time 0.034 sec -MIS time in seconds: 0.016528 +random 10000 by 10000, nz: 199746, method 0 time 0.039 sec +MIS time in seconds: 0.016474 isize: 1671 independent set found: 1671 of 10000 nodes maximal independent set status verified -random 10000 by 10000, nz: 199746, method 1 time 0.035 sec -MIS time in seconds: 0.017525 +random 10000 by 10000, nz: 199746, method 1 time 0.037 sec +MIS time in seconds: 0.012733 isize: 1671 independent set found: 1671 of 10000 nodes maximal independent set status verified -random 100000 by 100000, nz: 19979838, method 0 time 4.407 sec -MIS time in seconds: 1.224757 +random 100000 by 100000, nz: 19979838, method 0 time 4.438 sec +MIS time in seconds: 1.002717 isize: 2838 independent set found: 2838 of 100000 nodes maximal independent set status verified -random 100000 by 100000, nz: 19979838, method 1 time 3.880 sec -MIS time in seconds: 1.227401 +random 100000 by 100000, nz: 19979838, method 1 time 3.829 sec +MIS time in seconds: 0.911857 isize: 2838 independent set found: 2838 of 100000 nodes maximal independent set status verified diff --git a/GraphBLAS/Demo/Output/simple_demo.out b/GraphBLAS/Demo/Output/simple_demo.out index a3bc48c696..8cdae78272 100644 --- a/GraphBLAS/Demo/Output/simple_demo.out +++ b/GraphBLAS/Demo/Output/simple_demo.out @@ -1,5 +1,5 @@ -time to call simple_tic 1 million times: 0.348619 -time to generate 10 million random numbers: 0.103373 +time to call simple_tic 1 million times: 0.361331 +time to generate 10 million random numbers: 0.102471 first 10 random numbers: 0.513871 0.175726 @@ -11,4 +11,4 @@ first 10 random numbers: 0.226417 0.494766 0.124699 -time to generate 10 million random uint64: 0.085941 +time to generate 10 million random uint64: 0.084502 diff --git a/GraphBLAS/Demo/Output/tri_demo.out b/GraphBLAS/Demo/Output/tri_demo.out index 83219dcb1b..f91aa92484 100644 --- a/GraphBLAS/Demo/Output/tri_demo.out +++ b/GraphBLAS/Demo/Output/tri_demo.out @@ -1,611 +1,651 @@ -------------------------------------------------------------- Wathen: nx 4 ny 4 n 65 nz 817 method 0, time: 0.000 sec -total time to read A matrix: 0.000265 sec +total time to read A matrix: 0.000261 sec n 65 # edges 376 -U=triu(A) time: 0.000026 sec -L=U' time : 0.000012 sec +U=triu(A) time: 0.000013 sec +read A, create U memory usage: 2.9505e-05 GB +L=tril(A) time: 0.000006 sec # triangles 872 -tricount time: 0.000047 sec (outer product method) -tri+prep time: 0.000073 sec (incl time to compute U) -compute C time: 0.000040 sec -reduce (C) time: 0.000007 sec -rate 5.15 million edges/sec (incl time for U=triu(A)) -rate 8.00 million edges/sec (just tricount itself) - +tricount time: 0.000037 sec (dot product method) +tri+prep time: 0.000056 sec (incl time to compute L and U) +compute C time: 0.000031 sec +reduce (C) time: 0.000006 sec +rate 6.71 million edges/sec (incl time for U=triu(A)) +rate 10.16 million edges/sec (just tricount itself) -tricount time: 0.000041 sec (dot product method) -tri+prep time: 0.000079 sec (incl time to compute L and U) -compute C time: 0.000036 sec -reduce (C) time: 0.000005 sec -rate 4.76 million edges/sec (incl time for U=triu(A)) -rate 9.17 million edges/sec (just tricount itself) +tricount (dot) memory usage: 2.9984e-05 GB +tricount time: 0.000034 sec (outer product method) +tri+prep time: 0.000047 sec (incl time to compute U) +compute C time: 0.000030 sec +reduce (C) time: 0.000004 sec +rate 8.00 million edges/sec (incl time for U=triu(A)) +rate 11.06 million edges/sec (just tricount itself) +tricount (outer) memory usage: 2.9505e-05 GB -------------------------------------------------------------- random 5 by 5, nz: 16, method 1 time 0.000 sec -total time to read A matrix: 0.000070 sec +total time to read A matrix: 0.000101 sec n 5 # edges 8 -U=triu(A) time: 0.000014 sec -L=U' time : 0.000008 sec +U=triu(A) time: 0.000013 sec +read A, create U memory usage: 1.408e-06 GB +L=tril(A) time: 0.000003 sec # triangles 4 -tricount time: 0.000021 sec (outer product method) -tri+prep time: 0.000035 sec (incl time to compute U) -compute C time: 0.000013 sec -reduce (C) time: 0.000008 sec -rate 0.23 million edges/sec (incl time for U=triu(A)) -rate 0.38 million edges/sec (just tricount itself) - +tricount time: 0.000040 sec (dot product method) +tri+prep time: 0.000056 sec (incl time to compute L and U) +compute C time: 0.000030 sec +reduce (C) time: 0.000010 sec +rate 0.14 million edges/sec (incl time for U=triu(A)) +rate 0.20 million edges/sec (just tricount itself) -tricount time: 0.000007 sec (dot product method) -tri+prep time: 0.000029 sec (incl time to compute L and U) -compute C time: 0.000005 sec -reduce (C) time: 0.000002 sec -rate 0.28 million edges/sec (incl time for U=triu(A)) -rate 1.14 million edges/sec (just tricount itself) +tricount (dot) memory usage: 1.568e-06 GB +tricount time: 0.000024 sec (outer product method) +tri+prep time: 0.000037 sec (incl time to compute U) +compute C time: 0.000023 sec +reduce (C) time: 0.000001 sec +rate 0.22 million edges/sec (incl time for U=triu(A)) +rate 0.33 million edges/sec (just tricount itself) +tricount (outer) memory usage: 1.685e-06 GB -------------------------------------------------------------- ntuples: 3 nrows 3 ncols 3 time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.000024 +time to build the graph with GrB_Matrix_build: 0.000047 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000027 +A = (C+C')/2 time 0.000024 matrix 3 by 3, 0 entries, from stdin -total time to read A matrix: 0.000105 sec +total time to read A matrix: 0.000178 sec n 3 # edges 0 -U=triu(A) time: 0.000015 sec -L=U' time : 0.000003 sec +U=triu(A) time: 0.000016 sec +read A, create U memory usage: 8.34e-07 GB +L=tril(A) time: 0.000002 sec # triangles 0 -tricount time: 0.000038 sec (outer product method) -tri+prep time: 0.000053 sec (incl time to compute U) -compute C time: 0.000031 sec -reduce (C) time: 0.000007 sec +tricount time: 0.000070 sec (dot product method) +tri+prep time: 0.000088 sec (incl time to compute L and U) +compute C time: 0.000042 sec +reduce (C) time: 0.000028 sec rate 0.00 million edges/sec (incl time for U=triu(A)) rate 0.00 million edges/sec (just tricount itself) - -tricount time: 0.000005 sec (dot product method) -tri+prep time: 0.000023 sec (incl time to compute L and U) -compute C time: 0.000003 sec +tricount (dot) memory usage: 9.4e-07 GB +tricount time: 0.000038 sec (outer product method) +tri+prep time: 0.000054 sec (incl time to compute U) +compute C time: 0.000036 sec reduce (C) time: 0.000002 sec rate 0.00 million edges/sec (incl time for U=triu(A)) rate 0.00 million edges/sec (just tricount itself) +tricount (outer) memory usage: 1.153e-06 GB -------------------------------------------------------------- ntuples: 8 nrows 4 ncols 4 -time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000026 +time to prune self-edges: 0.000002 +time to build the graph with GrB_Matrix_build: 0.000035 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000032 +A = (C+C')/2 time 0.000031 matrix 4 by 4, 4 entries, from stdin -total time to read A matrix: 0.000141 sec +total time to read A matrix: 0.000149 sec n 4 # edges 2 -U=triu(A) time: 0.000014 sec -L=U' time : 0.000005 sec +U=triu(A) time: 0.000006 sec +read A, create U memory usage: 1.033e-06 GB +L=tril(A) time: 0.000002 sec # triangles 0 +tricount time: 0.000018 sec (dot product method) +tri+prep time: 0.000026 sec (incl time to compute L and U) +compute C time: 0.000014 sec +reduce (C) time: 0.000004 sec +rate 0.08 million edges/sec (incl time for U=triu(A)) +rate 0.11 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 1.145e-06 GB tricount time: 0.000016 sec (outer product method) -tri+prep time: 0.000030 sec (incl time to compute U) -compute C time: 0.000011 sec -reduce (C) time: 0.000005 sec -rate 0.07 million edges/sec (incl time for U=triu(A)) +tri+prep time: 0.000022 sec (incl time to compute U) +compute C time: 0.000014 sec +reduce (C) time: 0.000002 sec +rate 0.09 million edges/sec (incl time for U=triu(A)) rate 0.13 million edges/sec (just tricount itself) - -tricount time: 0.000005 sec (dot product method) -tri+prep time: 0.000024 sec (incl time to compute L and U) -compute C time: 0.000004 sec -reduce (C) time: 0.000001 sec -rate 0.08 million edges/sec (incl time for U=triu(A)) -rate 0.40 million edges/sec (just tricount itself) - +tricount (outer) memory usage: 1.317e-06 GB -------------------------------------------------------------- ntuples: 10 nrows 4 ncols 4 -time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000034 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000035 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000052 +A = (C+C')/2 time 0.000032 matrix 4 by 4, 10 entries, from stdin -total time to read A matrix: 0.000173 sec +total time to read A matrix: 0.000150 sec n 4 # edges 5 -U=triu(A) time: 0.000017 sec -L=U' time : 0.000006 sec +U=triu(A) time: 0.000006 sec +read A, create U memory usage: 1.105e-06 GB +L=tril(A) time: 0.000003 sec # triangles 2 -tricount time: 0.000026 sec (outer product method) -tri+prep time: 0.000043 sec (incl time to compute U) -compute C time: 0.000019 sec -reduce (C) time: 0.000007 sec -rate 0.12 million edges/sec (incl time for U=triu(A)) -rate 0.19 million edges/sec (just tricount itself) - - -tricount time: 0.000007 sec (dot product method) +tricount time: 0.000021 sec (dot product method) tri+prep time: 0.000030 sec (incl time to compute L and U) -compute C time: 0.000006 sec -reduce (C) time: 0.000001 sec +compute C time: 0.000016 sec +reduce (C) time: 0.000005 sec rate 0.17 million edges/sec (incl time for U=triu(A)) -rate 0.71 million edges/sec (just tricount itself) +rate 0.24 million edges/sec (just tricount itself) +tricount (dot) memory usage: 1.361e-06 GB +tricount time: 0.000015 sec (outer product method) +tri+prep time: 0.000021 sec (incl time to compute U) +compute C time: 0.000014 sec +reduce (C) time: 0.000001 sec +rate 0.24 million edges/sec (incl time for U=triu(A)) +rate 0.33 million edges/sec (just tricount itself) + +tricount (outer) memory usage: 1.473e-06 GB -------------------------------------------------------------- ntuples: 8 nrows 4 ncols 3 time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000031 +time to build the graph with GrB_Matrix_build: 0.000035 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000032 +time to construct augmented system: 0.000044 matrix 7 by 7, 16 entries, from stdin -total time to read A matrix: 0.000147 sec +total time to read A matrix: 0.000166 sec n 7 # edges 8 -U=triu(A) time: 0.000015 sec -L=U' time : 0.000006 sec +U=triu(A) time: 0.000010 sec +read A, create U memory usage: 5.201e-06 GB +L=tril(A) time: 0.000004 sec # triangles 0 -tricount time: 0.000024 sec (outer product method) -tri+prep time: 0.000039 sec (incl time to compute U) -compute C time: 0.000016 sec -reduce (C) time: 0.000008 sec -rate 0.21 million edges/sec (incl time for U=triu(A)) -rate 0.33 million edges/sec (just tricount itself) - +tricount time: 0.000029 sec (dot product method) +tri+prep time: 0.000043 sec (incl time to compute L and U) +compute C time: 0.000022 sec +reduce (C) time: 0.000007 sec +rate 0.19 million edges/sec (incl time for U=triu(A)) +rate 0.28 million edges/sec (just tricount itself) -tricount time: 0.000008 sec (dot product method) -tri+prep time: 0.000029 sec (incl time to compute L and U) -compute C time: 0.000007 sec +tricount (dot) memory usage: 5.201e-06 GB +tricount time: 0.000019 sec (outer product method) +tri+prep time: 0.000029 sec (incl time to compute U) +compute C time: 0.000018 sec reduce (C) time: 0.000001 sec rate 0.28 million edges/sec (incl time for U=triu(A)) -rate 1.00 million edges/sec (just tricount itself) +rate 0.42 million edges/sec (just tricount itself) +tricount (outer) memory usage: 5.201e-06 GB -------------------------------------------------------------- ntuples: 438 nrows 219 ncols 85 -time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000043 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000036 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000087 +time to construct augmented system: 0.000065 matrix 304 by 304, 876 entries, from stdin -total time to read A matrix: 0.000459 sec +total time to read A matrix: 0.000395 sec n 304 # edges 438 -U=triu(A) time: 0.000027 sec -L=U' time : 0.000008 sec +U=triu(A) time: 0.000010 sec +read A, create U memory usage: 3.0745e-05 GB +L=tril(A) time: 0.000005 sec # triangles 0 -tricount time: 0.000035 sec (outer product method) -tri+prep time: 0.000062 sec (incl time to compute U) -compute C time: 0.000028 sec -reduce (C) time: 0.000007 sec -rate 7.06 million edges/sec (incl time for U=triu(A)) -rate 12.51 million edges/sec (just tricount itself) - +tricount time: 0.000027 sec (dot product method) +tri+prep time: 0.000042 sec (incl time to compute L and U) +compute C time: 0.000021 sec +reduce (C) time: 0.000006 sec +rate 10.43 million edges/sec (incl time for U=triu(A)) +rate 16.22 million edges/sec (just tricount itself) -tricount time: 0.000014 sec (dot product method) -tri+prep time: 0.000049 sec (incl time to compute L and U) -compute C time: 0.000013 sec -reduce (C) time: 0.000001 sec -rate 8.94 million edges/sec (incl time for U=triu(A)) -rate 31.29 million edges/sec (just tricount itself) +tricount (dot) memory usage: 4.3857e-05 GB +tricount time: 0.000036 sec (outer product method) +tri+prep time: 0.000046 sec (incl time to compute U) +compute C time: 0.000033 sec +reduce (C) time: 0.000003 sec +rate 9.52 million edges/sec (incl time for U=triu(A)) +rate 12.17 million edges/sec (just tricount itself) +tricount (outer) memory usage: 4.5081e-05 GB -------------------------------------------------------------- ntuples: 224 nrows 48 ncols 48 -time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000039 +time to prune self-edges: 0.000000 +time to build the graph with GrB_Matrix_build: 0.000031 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000041 +A = (C+C')/2 time 0.000030 matrix 48 by 48, 352 entries, from stdin -total time to read A matrix: 0.000285 sec +total time to read A matrix: 0.000264 sec n 48 # edges 176 -U=triu(A) time: 0.000018 sec -L=U' time : 0.000007 sec +U=triu(A) time: 0.000012 sec +read A, create U memory usage: 1.0657e-05 GB +L=tril(A) time: 0.000008 sec # triangles 160 -tricount time: 0.000026 sec (outer product method) -tri+prep time: 0.000044 sec (incl time to compute U) -compute C time: 0.000019 sec -reduce (C) time: 0.000007 sec -rate 4.00 million edges/sec (incl time for U=triu(A)) -rate 6.77 million edges/sec (just tricount itself) - +tricount time: 0.000043 sec (dot product method) +tri+prep time: 0.000063 sec (incl time to compute L and U) +compute C time: 0.000034 sec +reduce (C) time: 0.000009 sec +rate 2.79 million edges/sec (incl time for U=triu(A)) +rate 4.09 million edges/sec (just tricount itself) -tricount time: 0.000030 sec (dot product method) -tri+prep time: 0.000055 sec (incl time to compute L and U) +tricount (dot) memory usage: 1.5433e-05 GB +tricount time: 0.000030 sec (outer product method) +tri+prep time: 0.000042 sec (incl time to compute U) compute C time: 0.000027 sec reduce (C) time: 0.000003 sec -rate 3.20 million edges/sec (incl time for U=triu(A)) +rate 4.19 million edges/sec (incl time for U=triu(A)) rate 5.87 million edges/sec (just tricount itself) +tricount (outer) memory usage: 1.353e-05 GB -------------------------------------------------------------- ntuples: 147631 nrows 4884 ncols 4884 -time to prune self-edges: 0.003637 -time to build the graph with GrB_Matrix_build: 0.002540 +time to prune self-edges: 0.003612 +time to build the graph with GrB_Matrix_build: 0.002998 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.006897 +A = (C+C')/2 time 0.002823 matrix 4884 by 4884, 285494 entries, from stdin -total time to read A matrix: 0.093778 sec +total time to read A matrix: 0.104883 sec n 4884 # edges 142747 -U=triu(A) time: 0.003440 sec -L=U' time : 0.000934 sec +U=triu(A) time: 0.000979 sec +read A, create U memory usage: 0.00700882 GB +L=tril(A) time: 0.001514 sec # triangles 1512964 -tricount time: 0.023825 sec (outer product method) -tri+prep time: 0.027265 sec (incl time to compute U) -compute C time: 0.023010 sec -reduce (C) time: 0.000815 sec -rate 5.24 million edges/sec (incl time for U=triu(A)) -rate 5.99 million edges/sec (just tricount itself) - +tricount time: 0.015935 sec (dot product method) +tri+prep time: 0.018428 sec (incl time to compute L and U) +compute C time: 0.014677 sec +reduce (C) time: 0.001258 sec +rate 7.75 million edges/sec (incl time for U=triu(A)) +rate 8.96 million edges/sec (just tricount itself) -tricount time: 0.037564 sec (dot product method) -tri+prep time: 0.041938 sec (incl time to compute L and U) -compute C time: 0.036396 sec -reduce (C) time: 0.001168 sec -rate 3.40 million edges/sec (incl time for U=triu(A)) -rate 3.80 million edges/sec (just tricount itself) +tricount (dot) memory usage: 0.010474 GB +tricount time: 0.010620 sec (outer product method) +tri+prep time: 0.011599 sec (incl time to compute U) +compute C time: 0.009762 sec +reduce (C) time: 0.000858 sec +rate 12.31 million edges/sec (incl time for U=triu(A)) +rate 13.44 million edges/sec (just tricount itself) +tricount (outer) memory usage: 0.00876607 GB -------------------------------------------------------------- ntuples: 1069 nrows 183 ncols 183 -time to prune self-edges: 0.000003 -time to build the graph with GrB_Matrix_build: 0.000047 +time to prune self-edges: 0.000005 +time to build the graph with GrB_Matrix_build: 0.000066 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000070 +A = (C+C')/2 time 0.000069 matrix 183 by 183, 1402 entries, from stdin -total time to read A matrix: 0.000901 sec +total time to read A matrix: 0.000913 sec n 183 # edges 701 -U=triu(A) time: 0.000033 sec -L=U' time : 0.000012 sec +U=triu(A) time: 0.000021 sec +read A, create U memory usage: 4.0177e-05 GB +L=tril(A) time: 0.000020 sec # triangles 863 -tricount time: 0.000084 sec (outer product method) -tri+prep time: 0.000117 sec (incl time to compute U) -compute C time: 0.000069 sec -reduce (C) time: 0.000015 sec -rate 5.99 million edges/sec (incl time for U=triu(A)) -rate 8.35 million edges/sec (just tricount itself) - - -tricount time: 0.000115 sec (dot product method) -tri+prep time: 0.000160 sec (incl time to compute L and U) -compute C time: 0.000110 sec +tricount time: 0.000093 sec (dot product method) +tri+prep time: 0.000134 sec (incl time to compute L and U) +compute C time: 0.000081 sec +reduce (C) time: 0.000012 sec +rate 5.23 million edges/sec (incl time for U=triu(A)) +rate 7.54 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 5.8633e-05 GB +tricount time: 0.000053 sec (outer product method) +tri+prep time: 0.000074 sec (incl time to compute U) +compute C time: 0.000048 sec reduce (C) time: 0.000005 sec -rate 4.38 million edges/sec (incl time for U=triu(A)) -rate 6.10 million edges/sec (just tricount itself) +rate 9.47 million edges/sec (incl time for U=triu(A)) +rate 13.23 million edges/sec (just tricount itself) +tricount (outer) memory usage: 5.0565e-05 GB -------------------------------------------------------------- ntuples: 123 nrows 32 ncols 31 -time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000059 +time to prune self-edges: 0.000000 +time to build the graph with GrB_Matrix_build: 0.000036 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000056 +time to construct augmented system: 0.000052 matrix 63 by 63, 246 entries, from stdin -total time to read A matrix: 0.000313 sec +total time to read A matrix: 0.000231 sec n 63 # edges 123 -U=triu(A) time: 0.000025 sec -L=U' time : 0.000007 sec +U=triu(A) time: 0.000011 sec +read A, create U memory usage: 8.391e-06 GB +L=tril(A) time: 0.000007 sec # triangles 0 -tricount time: 0.000097 sec (outer product method) -tri+prep time: 0.000122 sec (incl time to compute U) -compute C time: 0.000079 sec -reduce (C) time: 0.000018 sec -rate 1.01 million edges/sec (incl time for U=triu(A)) -rate 1.27 million edges/sec (just tricount itself) - +tricount time: 0.000031 sec (dot product method) +tri+prep time: 0.000049 sec (incl time to compute L and U) +compute C time: 0.000024 sec +reduce (C) time: 0.000007 sec +rate 2.51 million edges/sec (incl time for U=triu(A)) +rate 3.97 million edges/sec (just tricount itself) -tricount time: 0.000036 sec (dot product method) -tri+prep time: 0.000068 sec (incl time to compute L and U) -compute C time: 0.000034 sec -reduce (C) time: 0.000002 sec -rate 1.81 million edges/sec (incl time for U=triu(A)) -rate 3.42 million edges/sec (just tricount itself) +tricount (dot) memory usage: 1.1969e-05 GB +tricount time: 0.000022 sec (outer product method) +tri+prep time: 0.000033 sec (incl time to compute U) +compute C time: 0.000021 sec +reduce (C) time: 0.000001 sec +rate 3.73 million edges/sec (incl time for U=triu(A)) +rate 5.59 million edges/sec (just tricount itself) +tricount (outer) memory usage: 1.1857e-05 GB -------------------------------------------------------------- ntuples: 123 nrows 31 ncols 32 time to prune self-edges: 0.000000 -time to build the graph with GrB_Matrix_build: 0.000038 +time to build the graph with GrB_Matrix_build: 0.000053 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000043 +time to construct augmented system: 0.000052 matrix 63 by 63, 246 entries, from stdin -total time to read A matrix: 0.000205 sec +total time to read A matrix: 0.000249 sec n 63 # edges 123 -U=triu(A) time: 0.000027 sec -L=U' time : 0.000009 sec +U=triu(A) time: 0.000011 sec +read A, create U memory usage: 8.383e-06 GB +L=tril(A) time: 0.000007 sec # triangles 0 -tricount time: 0.000026 sec (outer product method) -tri+prep time: 0.000053 sec (incl time to compute U) -compute C time: 0.000020 sec -reduce (C) time: 0.000006 sec -rate 2.32 million edges/sec (incl time for U=triu(A)) -rate 4.73 million edges/sec (just tricount itself) - +tricount time: 0.000029 sec (dot product method) +tri+prep time: 0.000047 sec (incl time to compute L and U) +compute C time: 0.000024 sec +reduce (C) time: 0.000005 sec +rate 2.62 million edges/sec (incl time for U=triu(A)) +rate 4.24 million edges/sec (just tricount itself) -tricount time: 0.000010 sec (dot product method) -tri+prep time: 0.000046 sec (incl time to compute L and U) -compute C time: 0.000008 sec -reduce (C) time: 0.000002 sec -rate 2.67 million edges/sec (incl time for U=triu(A)) -rate 12.30 million edges/sec (just tricount itself) +tricount (dot) memory usage: 1.1961e-05 GB +tricount time: 0.000021 sec (outer product method) +tri+prep time: 0.000032 sec (incl time to compute U) +compute C time: 0.000020 sec +reduce (C) time: 0.000001 sec +rate 3.84 million edges/sec (incl time for U=triu(A)) +rate 5.86 million edges/sec (just tricount itself) +tricount (outer) memory usage: 1.1849e-05 GB -------------------------------------------------------------- ntuples: 102 nrows 27 ncols 51 time to prune self-edges: 0.000001 -time to build the graph with GrB_Matrix_build: 0.000029 +time to build the graph with GrB_Matrix_build: 0.000031 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.000072 +time to construct augmented system: 0.000043 matrix 78 by 78, 204 entries, from stdin -total time to read A matrix: 0.000211 sec +total time to read A matrix: 0.000207 sec n 78 # edges 102 -U=triu(A) time: 0.000021 sec -L=U' time : 0.000008 sec +U=triu(A) time: 0.000011 sec +read A, create U memory usage: 8.213e-06 GB +L=tril(A) time: 0.000007 sec # triangles 0 -tricount time: 0.000052 sec (outer product method) -tri+prep time: 0.000073 sec (incl time to compute U) -compute C time: 0.000045 sec +tricount time: 0.000030 sec (dot product method) +tri+prep time: 0.000048 sec (incl time to compute L and U) +compute C time: 0.000023 sec reduce (C) time: 0.000007 sec -rate 1.40 million edges/sec (incl time for U=triu(A)) -rate 1.96 million edges/sec (just tricount itself) - +rate 2.12 million edges/sec (incl time for U=triu(A)) +rate 3.40 million edges/sec (just tricount itself) -tricount time: 0.000010 sec (dot product method) -tri+prep time: 0.000039 sec (incl time to compute L and U) -compute C time: 0.000009 sec +tricount (dot) memory usage: 1.0897e-05 GB +tricount time: 0.000025 sec (outer product method) +tri+prep time: 0.000036 sec (incl time to compute U) +compute C time: 0.000024 sec reduce (C) time: 0.000001 sec -rate 2.62 million edges/sec (incl time for U=triu(A)) -rate 10.20 million edges/sec (just tricount itself) +rate 2.83 million edges/sec (incl time for U=triu(A)) +rate 4.08 million edges/sec (just tricount itself) +tricount (outer) memory usage: 1.1193e-05 GB -------------------------------------------------------------- ntuples: 49920 nrows 492 ncols 490 time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.000896 +time to build the graph with GrB_Matrix_build: 0.000754 make symmetric A = [0 C ; C' 0], a bipartite graph -time to construct augmented system: 0.005180 +time to construct augmented system: 0.005091 matrix 982 by 982, 99840 entries, from stdin -total time to read A matrix: 0.030604 sec +total time to read A matrix: 0.037856 sec n 982 # edges 49920 -U=triu(A) time: 0.002114 sec -L=U' time : 0.000318 sec +U=triu(A) time: 0.000291 sec +read A, create U memory usage: 0.00270163 GB +L=tril(A) time: 0.000641 sec # triangles 0 -tricount time: 0.000476 sec (outer product method) -tri+prep time: 0.002590 sec (incl time to compute U) -compute C time: 0.000470 sec -reduce (C) time: 0.000006 sec -rate 19.27 million edges/sec (incl time for U=triu(A)) -rate 104.87 million edges/sec (just tricount itself) - +tricount time: 0.000330 sec (dot product method) +tri+prep time: 0.001262 sec (incl time to compute L and U) +compute C time: 0.000325 sec +reduce (C) time: 0.000005 sec +rate 39.56 million edges/sec (incl time for U=triu(A)) +rate 151.27 million edges/sec (just tricount itself) -tricount time: 0.000360 sec (dot product method) -tri+prep time: 0.002792 sec (incl time to compute L and U) -compute C time: 0.000358 sec +tricount (dot) memory usage: 0.00363044 GB +tricount time: 0.000191 sec (outer product method) +tri+prep time: 0.000482 sec (incl time to compute U) +compute C time: 0.000189 sec reduce (C) time: 0.000002 sec -rate 17.88 million edges/sec (incl time for U=triu(A)) -rate 138.67 million edges/sec (just tricount itself) +rate 103.57 million edges/sec (incl time for U=triu(A)) +rate 261.36 million edges/sec (just tricount itself) +tricount (outer) memory usage: 0.00324666 GB -------------------------------------------------------------- ntuples: 299 nrows 67 ncols 67 -time to prune self-edges: 0.000002 -time to build the graph with GrB_Matrix_build: 0.000051 +time to prune self-edges: 0.000001 +time to build the graph with GrB_Matrix_build: 0.000050 make symmetric A = (C+C')/2 -A = (C+C')/2 time 0.000039 +A = (C+C')/2 time 0.000033 matrix 67 by 67, 574 entries, from stdin -total time to read A matrix: 0.000316 sec +total time to read A matrix: 0.000255 sec n 67 # edges 287 -U=triu(A) time: 0.000024 sec -L=U' time : 0.000009 sec +U=triu(A) time: 0.000009 sec +read A, create U memory usage: 1.6593e-05 GB +L=tril(A) time: 0.000007 sec # triangles 120 -tricount time: 0.000043 sec (outer product method) -tri+prep time: 0.000067 sec (incl time to compute U) -compute C time: 0.000037 sec +tricount time: 0.000031 sec (dot product method) +tri+prep time: 0.000047 sec (incl time to compute L and U) +compute C time: 0.000025 sec reduce (C) time: 0.000006 sec -rate 4.28 million edges/sec (incl time for U=triu(A)) -rate 6.67 million edges/sec (just tricount itself) +rate 6.11 million edges/sec (incl time for U=triu(A)) +rate 9.26 million edges/sec (just tricount itself) - -tricount time: 0.000022 sec (dot product method) -tri+prep time: 0.000055 sec (incl time to compute L and U) +tricount (dot) memory usage: 2.4185e-05 GB +tricount time: 0.000022 sec (outer product method) +tri+prep time: 0.000031 sec (incl time to compute U) compute C time: 0.000020 sec reduce (C) time: 0.000002 sec -rate 5.22 million edges/sec (incl time for U=triu(A)) +rate 9.26 million edges/sec (incl time for U=triu(A)) rate 13.05 million edges/sec (just tricount itself) +tricount (outer) memory usage: 2.0969e-05 GB -------------------------------------------------------------- -Wathen: nx 200 ny 200 n 120801 nz 1883201 method 0, time: 0.305 sec +Wathen: nx 200 ny 200 n 120801 nz 1883201 method 0, time: 0.308 sec -total time to read A matrix: 0.372661 sec +total time to read A matrix: 0.367957 sec n 120801 # edges 881200 -U=triu(A) time: 0.034735 sec -L=U' time : 0.007189 sec +U=triu(A) time: 0.010182 sec +read A, create U memory usage: 0.065216 GB +L=tril(A) time: 0.012222 sec # triangles 2160400 -tricount time: 0.043954 sec (outer product method) -tri+prep time: 0.078689 sec (incl time to compute U) -compute C time: 0.039183 sec -reduce (C) time: 0.004771 sec -rate 11.20 million edges/sec (incl time for U=triu(A)) -rate 20.05 million edges/sec (just tricount itself) - +tricount time: 0.035040 sec (dot product method) +tri+prep time: 0.057444 sec (incl time to compute L and U) +compute C time: 0.030684 sec +reduce (C) time: 0.004356 sec +rate 15.34 million edges/sec (incl time for U=triu(A)) +rate 25.15 million edges/sec (just tricount itself) -tricount time: 0.054829 sec (dot product method) -tri+prep time: 0.096753 sec (incl time to compute L and U) -compute C time: 0.050540 sec -reduce (C) time: 0.004289 sec -rate 9.11 million edges/sec (incl time for U=triu(A)) -rate 16.07 million edges/sec (just tricount itself) +tricount (dot) memory usage: 0.0673129 GB +tricount time: 0.018321 sec (outer product method) +tri+prep time: 0.028503 sec (incl time to compute U) +compute C time: 0.014007 sec +reduce (C) time: 0.004314 sec +rate 30.92 million edges/sec (incl time for U=triu(A)) +rate 48.10 million edges/sec (just tricount itself) +tricount (outer) memory usage: 0.065216 GB -------------------------------------------------------------- -random 10000 by 10000, nz: 199746, method 0 time 0.035 sec +random 10000 by 10000, nz: 199746, method 0 time 0.039 sec -total time to read A matrix: 0.037346 sec +total time to read A matrix: 0.041646 sec n 10000 # edges 99873 -U=triu(A) time: 0.003833 sec -L=U' time : 0.003316 sec +U=triu(A) time: 0.001691 sec +read A, create U memory usage: 0.00805162 GB +L=tril(A) time: 0.002014 sec # triangles 1301 -tricount time: 0.005017 sec (outer product method) -tri+prep time: 0.008850 sec (incl time to compute U) -compute C time: 0.004924 sec -reduce (C) time: 0.000093 sec -rate 11.29 million edges/sec (incl time for U=triu(A)) -rate 19.91 million edges/sec (just tricount itself) +tricount time: 0.020875 sec (dot product method) +tri+prep time: 0.024580 sec (incl time to compute L and U) +compute C time: 0.020833 sec +reduce (C) time: 0.000042 sec +rate 4.06 million edges/sec (incl time for U=triu(A)) +rate 4.78 million edges/sec (just tricount itself) +tricount (dot) memory usage: 0.00805162 GB +tricount time: 0.004295 sec (outer product method) +tri+prep time: 0.005986 sec (incl time to compute U) +compute C time: 0.004256 sec +reduce (C) time: 0.000039 sec +rate 16.68 million edges/sec (incl time for U=triu(A)) +rate 23.25 million edges/sec (just tricount itself) -tricount time: 0.013202 sec (dot product method) -tri+prep time: 0.020351 sec (incl time to compute L and U) -compute C time: 0.013192 sec -reduce (C) time: 0.000010 sec -rate 4.91 million edges/sec (incl time for U=triu(A)) -rate 7.56 million edges/sec (just tricount itself) - +tricount (outer) memory usage: 0.00805162 GB -------------------------------------------------------------- -random 10000 by 10000, nz: 199746, method 1 time 0.030 sec +random 10000 by 10000, nz: 199746, method 1 time 0.032 sec -total time to read A matrix: 0.031984 sec +total time to read A matrix: 0.033590 sec n 10000 # edges 99873 -U=triu(A) time: 0.003223 sec -L=U' time : 0.003558 sec +U=triu(A) time: 0.001373 sec +read A, create U memory usage: 0.00583355 GB +L=tril(A) time: 0.001344 sec # triangles 1301 -tricount time: 0.005279 sec (outer product method) -tri+prep time: 0.008502 sec (incl time to compute U) -compute C time: 0.005249 sec -reduce (C) time: 0.000030 sec -rate 11.75 million edges/sec (incl time for U=triu(A)) -rate 18.92 million edges/sec (just tricount itself) +tricount time: 0.017314 sec (dot product method) +tri+prep time: 0.020031 sec (incl time to compute L and U) +compute C time: 0.017276 sec +reduce (C) time: 0.000038 sec +rate 4.99 million edges/sec (incl time for U=triu(A)) +rate 5.77 million edges/sec (just tricount itself) +tricount (dot) memory usage: 0.00751169 GB +tricount time: 0.003868 sec (outer product method) +tri+prep time: 0.005241 sec (incl time to compute U) +compute C time: 0.003854 sec +reduce (C) time: 0.000014 sec +rate 19.06 million edges/sec (incl time for U=triu(A)) +rate 25.82 million edges/sec (just tricount itself) -tricount time: 0.015104 sec (dot product method) -tri+prep time: 0.021885 sec (incl time to compute L and U) -compute C time: 0.015093 sec -reduce (C) time: 0.000011 sec -rate 4.56 million edges/sec (incl time for U=triu(A)) -rate 6.61 million edges/sec (just tricount itself) - +tricount (outer) memory usage: 0.00636337 GB -------------------------------------------------------------- -random 100000 by 100000, nz: 19979838, method 0 time 4.453 sec +random 100000 by 100000, nz: 19979838, method 0 time 4.827 sec -total time to read A matrix: 4.713745 sec +total time to read A matrix: 5.023001 sec n 100000 # edges 9989919 -U=triu(A) time: 0.463484 sec -L=U' time : 0.437929 sec +U=triu(A) time: 0.127310 sec +read A, create U memory usage: 0.966905 GB +L=tril(A) time: 0.135162 sec # triangles 1328523 -tricount time: 2.445643 sec (outer product method) -tri+prep time: 2.909127 sec (incl time to compute U) -compute C time: 2.429495 sec -reduce (C) time: 0.016148 sec -rate 3.43 million edges/sec (incl time for U=triu(A)) -rate 4.08 million edges/sec (just tricount itself) - +tricount time: 26.325303 sec (dot product method) +tri+prep time: 26.587775 sec (incl time to compute L and U) +compute C time: 26.316294 sec +reduce (C) time: 0.009009 sec +rate 0.38 million edges/sec (incl time for U=triu(A)) +rate 0.38 million edges/sec (just tricount itself) -tricount time: 24.631576 sec (dot product method) -tri+prep time: 25.532989 sec (incl time to compute L and U) -compute C time: 24.622927 sec -reduce (C) time: 0.008649 sec -rate 0.39 million edges/sec (incl time for U=triu(A)) -rate 0.41 million edges/sec (just tricount itself) +tricount (dot) memory usage: 0.966905 GB +tricount time: 2.857520 sec (outer product method) +tri+prep time: 2.984830 sec (incl time to compute U) +compute C time: 2.845165 sec +reduce (C) time: 0.012355 sec +rate 3.35 million edges/sec (incl time for U=triu(A)) +rate 3.50 million edges/sec (just tricount itself) +tricount (outer) memory usage: 0.966905 GB -------------------------------------------------------------- -random 100000 by 100000, nz: 19979838, method 1 time 3.887 sec +random 100000 by 100000, nz: 19979838, method 1 time 4.229 sec -total time to read A matrix: 4.130590 sec +total time to read A matrix: 4.429307 sec n 100000 # edges 9989919 -U=triu(A) time: 0.455335 sec -L=U' time : 0.439813 sec +U=triu(A) time: 0.119248 sec +read A, create U memory usage: 0.561836 GB +L=tril(A) time: 0.132799 sec # triangles 1328523 -tricount time: 2.440143 sec (outer product method) -tri+prep time: 2.895478 sec (incl time to compute U) -compute C time: 2.424204 sec -reduce (C) time: 0.015939 sec -rate 3.45 million edges/sec (incl time for U=triu(A)) -rate 4.09 million edges/sec (just tricount itself) - - -tricount time: 25.179597 sec (dot product method) -tri+prep time: 26.074745 sec (incl time to compute L and U) -compute C time: 25.172452 sec -reduce (C) time: 0.007145 sec +tricount time: 26.175215 sec (dot product method) +tri+prep time: 26.427262 sec (incl time to compute L and U) +compute C time: 26.167814 sec +reduce (C) time: 0.007401 sec rate 0.38 million edges/sec (incl time for U=triu(A)) -rate 0.40 million edges/sec (just tricount itself) +rate 0.38 million edges/sec (just tricount itself) + +tricount (dot) memory usage: 0.722475 GB +tricount time: 2.713645 sec (outer product method) +tri+prep time: 2.832893 sec (incl time to compute U) +compute C time: 2.702922 sec +reduce (C) time: 0.010723 sec +rate 3.53 million edges/sec (incl time for U=triu(A)) +rate 3.68 million edges/sec (just tricount itself) +tricount (outer) memory usage: 0.603096 GB diff --git a/GraphBLAS/Demo/Program/bfs_demo.c b/GraphBLAS/Demo/Program/bfs_demo.c index 4030f3a169..4698ec1c9e 100644 --- a/GraphBLAS/Demo/Program/bfs_demo.c +++ b/GraphBLAS/Demo/Program/bfs_demo.c @@ -58,7 +58,7 @@ int main (int argc, char **argv) //-------------------------------------------------------------------------- // self edges are OK - OK (get_matrix (&A, argc, argv, false)) ; + OK (get_matrix (&A, argc, argv, false, true)) ; GrB_Index n ; OK (GrB_Matrix_nrows (&n, A)) ; @@ -147,6 +147,8 @@ int main (int argc, char **argv) fprintf (stderr, "nodes reached: %.16g of %.16g levels: %.16g " "time: %12.6f seconds\n", (double) nreachable, (double) n, (double) nlevels, t) ; + + OK (GrB_free (&v)) ; } // free all workspace, including A, v, and max_monoid if allocated diff --git a/GraphBLAS/Demo/Program/mis_demo.c b/GraphBLAS/Demo/Program/mis_demo.c index b30eabe6ec..06e29e0b4b 100644 --- a/GraphBLAS/Demo/Program/mis_demo.c +++ b/GraphBLAS/Demo/Program/mis_demo.c @@ -67,7 +67,7 @@ int main (int argc, char **argv) // get a symmetric matrix with no self edges //-------------------------------------------------------------------------- - OK (get_matrix (&A, argc, argv, true)) ; + OK (get_matrix (&A, argc, argv, true, true)) ; GrB_Index n ; OK (GrB_Matrix_nrows (&n, A)) ; diff --git a/GraphBLAS/Demo/Program/tri_demo.c b/GraphBLAS/Demo/Program/tri_demo.c index daa07a6585..188b5cb051 100644 --- a/GraphBLAS/Demo/Program/tri_demo.c +++ b/GraphBLAS/Demo/Program/tri_demo.c @@ -48,7 +48,7 @@ int main (int argc, char **argv) { GrB_Matrix C = NULL, A = NULL, L = NULL, U = NULL ; GrB_Info info ; - double tic [2] ; + double tic [2], r1, r2 ; OK (GrB_init (GrB_NONBLOCKING)) ; fprintf (stderr, "tri_demo:\n") ; printf ("--------------------------------------------------------------\n"); @@ -57,12 +57,12 @@ int main (int argc, char **argv) // get a symmetric matrix with no self edges //-------------------------------------------------------------------------- - // get_matrix reads in a double-precision matrix. It could easily be - // changed to read in uint32 matrix instead, but this would affect the - // other GraphBLAS demos. So the time to typecast A = (uint32) C is added - // to the read time, not the prep time for triangle counting. + // get_matrix reads in a boolean matrix. It could easily be changed to + // read in uint32 matrix instead, but this would affect the other GraphBLAS + // demos. So the time to typecast A = (uint32) C is added to the read + // time, not the prep time for triangle counting. simple_tic (tic) ; - OK (get_matrix (&C, argc, argv, true)) ; + OK (get_matrix (&C, argc, argv, true, true)) ; GrB_Index n, nedges ; OK (GrB_Matrix_nrows (&n, C)) ; @@ -83,6 +83,19 @@ int main (int argc, char **argv) double t_prune = simple_toc (tic) ; printf ("U=triu(A) time: %14.6f sec\n", t_prune) ; + GxB_Statistics stats ; + + int64_t maxused1 ; + GxB_stats (&stats) ; + maxused1 = stats.maxused ; + + printf ("read A, create U memory usage: %g GB\n", + 1e-9 * (double) maxused1) ; + + //-------------------------------------------------------------------------- + // count the triangles via C = L'*U (dot-produt) + //-------------------------------------------------------------------------- + // L = tril (A,-1), for method 4 simple_tic (tic) ; OK (GrB_Matrix_new (&L, GrB_UINT32, n, n)) ; @@ -90,6 +103,34 @@ int main (int argc, char **argv) OK (GxB_select (L, NULL, NULL, GxB_TRIL, A, &k, NULL)) ; double t_trans = simple_toc (tic) ; printf ("L=tril(A) time: %14.6f sec\n", t_trans) ; + OK (GrB_free (&A)) ; + + double t_dot [2] ; + int64_t ntri2 ; + OK (tricount (&ntri2, 5, NULL, NULL, L, U, t_dot)) ; + + printf ("# triangles %.16g\n", (double) ntri2) ; + + printf ("\ntricount time: %14.6f sec (dot product method)\n", + t_dot [0] + t_dot [1]) ; + printf ("tri+prep time: %14.6f sec (incl time to compute L and U)\n", + t_dot [0] + t_dot [1] + t_prune + t_trans) ; + + printf ("compute C time: %14.6f sec\n", t_dot [0]) ; + printf ("reduce (C) time: %14.6f sec\n", t_dot [1]) ; + + r1 = 1e-6*nedges / (t_dot [0] + t_dot [1] + t_prune + t_trans) ; + r2 = 1e-6*nedges / (t_dot [0] + t_dot [1]) ; + printf ("rate %10.2f million edges/sec (incl time for U=triu(A))\n", r1) ; + printf ("rate %10.2f million edges/sec (just tricount itself)\n\n", r2) ; + fprintf (stderr, + "rate %10.2f (with prep), %10.2f (just tricount)\n", r1, r2) ; + + int64_t maxused2 ; + GxB_stats (&stats) ; + maxused2 = MAX (maxused1, stats.maxused) ; + printf ("tricount (dot) memory usage: %g GB\n", + 1e-9 * (double) maxused2) ; //-------------------------------------------------------------------------- // count the triangles via C = U*U (outer-product) @@ -97,9 +138,7 @@ int main (int argc, char **argv) double t_mark [2] = { 0, 0 } ; int64_t ntri1 ; - OK (tricount (&ntri1, 3, A, NULL, NULL, U, t_mark)) ; - - printf ("# triangles %.16g\n\n", (double) ntri1) ; + OK (tricount (&ntri1, 3, NULL, NULL, NULL, U, t_mark)) ; printf ("tricount time: %14.6f sec (outer product method)\n", t_mark [0] + t_mark [1]) ; @@ -109,35 +148,18 @@ int main (int argc, char **argv) printf ("compute C time: %14.6f sec\n", t_mark [0]) ; printf ("reduce (C) time: %14.6f sec\n", t_mark [1]) ; - double r1 = 1e-6*((double)nedges) / (t_mark [0] + t_mark [1] + t_prune) ; - double r2 = 1e-6*((double)nedges) / (t_mark [0] + t_mark [1]) ; + r1 = 1e-6*((double)nedges) / (t_mark [0] + t_mark [1] + t_prune) ; + r2 = 1e-6*((double)nedges) / (t_mark [0] + t_mark [1]) ; printf ("rate %10.2f million edges/sec (incl time for U=triu(A))\n", r1) ; printf ("rate %10.2f million edges/sec (just tricount itself)\n\n", r2) ; fprintf (stderr, "rate %10.2f (with prep), %10.2f (just tricount)\n", r1, r2) ; - //-------------------------------------------------------------------------- - // count the triangles via C = L'*U (dot-produt) - //-------------------------------------------------------------------------- - - double t_dot [2] ; - int64_t ntri2 ; - OK (tricount (&ntri2, 5, A, NULL, L, U, t_dot)) ; - - printf ("\ntricount time: %14.6f sec (dot product method)\n", - t_dot [0] + t_dot [1]) ; - printf ("tri+prep time: %14.6f sec (incl time to compute L and U)\n", - t_dot [0] + t_dot [1] + t_prune + t_trans) ; - - printf ("compute C time: %14.6f sec\n", t_dot [0]) ; - printf ("reduce (C) time: %14.6f sec\n", t_dot [1]) ; - - r1 = 1e-6*nedges / (t_dot [0] + t_dot [1] + t_prune + t_trans) ; - r2 = 1e-6*nedges / (t_dot [0] + t_dot [1]) ; - printf ("rate %10.2f million edges/sec (incl time for U=triu(A))\n", r1) ; - printf ("rate %10.2f million edges/sec (just tricount itself)\n\n", r2) ; - fprintf (stderr, - "rate %10.2f (with prep), %10.2f (just tricount)\n", r1, r2) ; + int64_t maxused3 ; + GxB_stats (&stats) ; + maxused3 = MAX (maxused1, stats.maxused) ; + printf ("tricount (outer) memory usage: %g GB\n", + 1e-9 * (double) maxused3) ; //-------------------------------------------------------------------------- // check result and free workspace diff --git a/GraphBLAS/Demo/Program/wildtype_demo.c b/GraphBLAS/Demo/Program/wildtype_demo.c index cde34f03a8..966911c6ce 100644 --- a/GraphBLAS/Demo/Program/wildtype_demo.c +++ b/GraphBLAS/Demo/Program/wildtype_demo.c @@ -225,6 +225,7 @@ int main (void) GrB_free (&C) ; GrB_free (&A) ; GrB_free (&B) ; + GrB_free (&D) ; GrB_free (&InTheWild) ; GrB_free (&WildAdder) ; GrB_free (&WildAdd) ; diff --git a/GraphBLAS/Demo/README.txt b/GraphBLAS/Demo/README.txt index a22e6665cc..de9f264b8b 100644 --- a/GraphBLAS/Demo/README.txt +++ b/GraphBLAS/Demo/README.txt @@ -51,12 +51,13 @@ wildtype_demo.c demo program with arbitrary struct as user-defined type in Demo/Output: -------------------------------------------------------------------------------- -bfs_demo.out_ok output of bfs_demo -complex_demo_out_ok.m output of complex_demo, run in MATLAB to check results -mis_demo.out_ok output of mis_demo -simple_test.out_ok output of simple_demo -tri_demo.out_ok output of tri_demo -wildtype_demo.out_ok output of wildtype_demo +bfs_demo.out output of bfs_demo +complex_demo_out.m output of complex_demo, run in MATLAB to check results +go_out_laptop.txt output of go.m on a MacBook Pro +mis_demo.out output of mis_demo +simple_test.out output of simple_demo +tri_demo.out output of tri_demo +wildtype_demo.out output of wildtype_demo -------------------------------------------------------------------------------- in Demo/Include: @@ -75,6 +76,8 @@ tricount.m five triangle counting methods using MATLAB adj_to_edges.m convert adjacency matrix to incidence matrix edges_to_adj.m convert incidence matrix to adjacency matrix check_adj.m check an adjaceny matrix +tri_matlab.m run a set of GraphChallenge matrices +tri_matlab_out.txt output of tri_matlab.m -------------------------------------------------------------------------------- in Demo/Matrix: diff --git a/GraphBLAS/Demo/Source/get_matrix.c b/GraphBLAS/Demo/Source/get_matrix.c index 854b249ed1..0ea2745d68 100644 --- a/GraphBLAS/Demo/Source/get_matrix.c +++ b/GraphBLAS/Demo/Source/get_matrix.c @@ -22,7 +22,8 @@ GrB_Info get_matrix // get a matrix from stdin, or create random one GrB_Matrix *A_output, // matrix to create int argc, // command-line arguments char **argv, - bool no_self_edges // if true, ensure the matrix has no self-edges + bool no_self_edges, // if true, ensure the matrix has no self-edges + bool boolean // if true, file is read as GrB_BOOL, else GrB_FP64 ) { @@ -133,6 +134,7 @@ GrB_Info get_matrix // get a matrix from stdin, or create random one OK (GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE)) ; OK (GrB_transpose (A, Mask, NULL, A, desc)) ; GrB_free (&Mask) ; + GrB_free (&desc) ; t = simple_toc (tic) ; } } @@ -154,7 +156,7 @@ GrB_Info get_matrix // get a matrix from stdin, or create random one bool one_based = false ; if (argc > 2) one_based = strtol (argv [1], NULL, 0) ; - OK (read_matrix (&A, stdin, true, no_self_edges, one_based)) ; + OK (read_matrix (&A, stdin, true, no_self_edges, one_based, boolean)) ; OK (GrB_Matrix_nrows (&nrows, A)) ; OK (GrB_Matrix_ncols (&ncols, A)) ; diff --git a/GraphBLAS/Demo/Source/read_matrix.c b/GraphBLAS/Demo/Source/read_matrix.c index 348cc944dc..80ee76ed76 100644 --- a/GraphBLAS/Demo/Source/read_matrix.c +++ b/GraphBLAS/Demo/Source/read_matrix.c @@ -45,13 +45,14 @@ void scale2 (double *z, const double *x) // read a matrix from a file //------------------------------------------------------------------------------ -GrB_Info read_matrix // read a double-precision matrix +GrB_Info read_matrix // read a double-precision or boolean matrix ( GrB_Matrix *A_output, // handle of matrix to create FILE *f, // file to read the tuples from bool make_symmetric, // if true, return A as symmetric bool no_self_edges, // if true, then remove self edges from A - bool one_based // if true, input matrix is 1-based + bool one_based, // if true, input matrix is 1-based + bool boolean // if true, input is GrB_BOOL, otherwise GrB_FP64 ) { @@ -72,9 +73,13 @@ GrB_Info read_matrix // read a double-precision matrix // allocate initial space for tuples //-------------------------------------------------------------------------- + size_t xsize = ((boolean) ? sizeof (bool) : sizeof (double)) ; GrB_Index *I = malloc (len * sizeof (int64_t)), *I2 = NULL ; GrB_Index *J = malloc (len * sizeof (int64_t)), *J2 = NULL ; - double *X = malloc (len * sizeof (double )), *X2 = NULL ; + void *X = malloc (len * xsize) ; + bool *Xbool ; + double *Xdouble ; + void *X2 = NULL ; if (I == NULL || J == NULL || X == NULL) { // out of memory @@ -83,6 +88,9 @@ GrB_Info read_matrix // read a double-precision matrix return (GrB_OUT_OF_MEMORY) ; } + Xbool = (bool *) X ; + Xdouble = (double *) X ; + //-------------------------------------------------------------------------- // read in the tuples from stdin, one per line //-------------------------------------------------------------------------- @@ -97,7 +105,7 @@ GrB_Info read_matrix // read a double-precision matrix { I2 = realloc (I, 2 * len * sizeof (int64_t)) ; J2 = realloc (J, 2 * len * sizeof (int64_t)) ; - X2 = realloc (X, 2 * len * sizeof (double )) ; + X2 = realloc (X, 2 * len * xsize) ; if (I2 == NULL || J2 == NULL || X2 == NULL) { printf ("out of memory for tuples\n") ; @@ -108,6 +116,8 @@ GrB_Info read_matrix // read a double-precision matrix J = J2 ; J2 = NULL ; X = X2 ; X2 = NULL ; len = len * 2 ; + Xbool = (bool *) X ; + Xdouble = (double *) X ; } if (one_based) { @@ -116,7 +126,14 @@ GrB_Info read_matrix // read a double-precision matrix } I [ntuples] = i ; J [ntuples] = j ; - X [ntuples] = x ; + if (boolean) + { + Xbool [ntuples] = (x != 0) ; + } + else + { + Xdouble [ntuples] = x ; + } ntuples++ ; } @@ -154,7 +171,14 @@ GrB_Info read_matrix // read a double-precision matrix // keep this off-diagonal edge I [ntuples2] = I [k] ; J [ntuples2] = J [k] ; - X [ntuples2] = X [k] ; + if (boolean) + { + Xbool [ntuples2] = Xbool [k] ; + } + else + { + Xdouble [ntuples2] = Xdouble [k] ; + } ntuples2++ ; } } @@ -167,10 +191,32 @@ GrB_Info read_matrix // read a double-precision matrix // build the matrix, summing up duplicates, and then free the tuples //-------------------------------------------------------------------------- + GrB_Type xtype ; + GrB_BinaryOp xop, xop2 ; + if (boolean) + { + xtype = GrB_BOOL ; + xop = GrB_LOR ; + xop2 = GrB_FIRST_BOOL ; + } + else + { + xtype = xtype ; + xop = GrB_PLUS_FP64 ; + xop2 = GrB_FIRST_FP64 ; + } + simple_tic (tic) ; GrB_Info info ; - OK (GrB_Matrix_new (&C, GrB_FP64, nrows, ncols)) ; - OK (GrB_Matrix_build (C, I, J, X, ntuples, GrB_PLUS_FP64)) ; + OK (GrB_Matrix_new (&C, xtype, nrows, ncols)) ; + if (boolean) + { + OK (GrB_Matrix_build (C, I, J, Xbool, ntuples, xop)) ; + } + else + { + OK (GrB_Matrix_build (C, I, J, Xdouble, ntuples, xop)) ; + } t1 = simple_toc (tic) ; printf ("time to build the graph with GrB_Matrix_build: %12.6f\n", t1) ; @@ -186,7 +232,7 @@ GrB_Info read_matrix // read a double-precision matrix // rebuilt every time a single entry is added. simple_tic (tic) ; - OK (GrB_Matrix_new (&B, GrB_FP64, nrows, ncols)) ; + OK (GrB_Matrix_new (&B, xtype, nrows, ncols)) ; for (int64_t k = 0 ; k < ntuples ; k++) { // B (I[k], J[k]) = X [k] @@ -238,16 +284,26 @@ GrB_Info read_matrix // read a double-precision matrix printf ("A = (C+C')/2\n") ; double tic [2], t ; simple_tic (tic) ; - OK (GrB_Matrix_new (&A, GrB_FP64, nrows, nrows)) ; - OK (GrB_eWiseAdd (A, NULL, NULL, GrB_PLUS_FP64, C, C, dt2)) ; + OK (GrB_Matrix_new (&A, xtype, nrows, nrows)) ; + OK (GrB_eWiseAdd (A, NULL, NULL, xop, C, C, dt2)) ; OK (GrB_free (&C)) ; - OK (GrB_Matrix_new (&C, GrB_FP64, nrows, nrows)) ; - OK (GrB_UnaryOp_new (&scale2_op, scale2, GrB_FP64, GrB_FP64)) ; - OK (GrB_apply (C, NULL, NULL, scale2_op, A, NULL)) ; - OK (GrB_free (&A)) ; - OK (GrB_free (&scale2_op)) ; - *A_output = C ; - C = NULL ; + + if (boolean) + { + *A_output = A ; + A = NULL ; + } + else + { + OK (GrB_Matrix_new (&C, xtype, nrows, nrows)) ; + OK (GrB_UnaryOp_new (&scale2_op, scale2, xtype, xtype)) ; + OK (GrB_apply (C, NULL, NULL, scale2_op, A, NULL)) ; + OK (GrB_free (&A)) ; + OK (GrB_free (&scale2_op)) ; + *A_output = C ; + C = NULL ; + } + t = simple_toc (tic) ; printf ("A = (C+C')/2 time %12.6f\n", t) ; @@ -266,7 +322,7 @@ GrB_Info read_matrix // read a double-precision matrix simple_tic (tic) ; int64_t n = nrows + ncols ; - OK (GrB_Matrix_new (&A, GrB_FP64, n, n)) ; + OK (GrB_Matrix_new (&A, xtype, n, n)) ; I = malloc (nrows * sizeof (int64_t)) ; J = malloc (ncols * sizeof (int64_t)) ; @@ -294,11 +350,11 @@ GrB_Info read_matrix // read a double-precision matrix } // A (nrows:n-1, 0:nrows-1) += C' - OK (GrB_assign (A, NULL, GrB_FIRST_FP64, // or NULL, + OK (GrB_assign (A, NULL, xop2, // or NULL, C, J, ncols, I, nrows, dt1)) ; // A (0:nrows-1, nrows:n-1) += C - OK (GrB_assign (A, NULL, GrB_FIRST_FP64, // or NULL, + OK (GrB_assign (A, NULL, xop2, // or NULL, C, I, nrows, J, ncols, NULL)) ; // force completion; if this statement does not appear, the diff --git a/GraphBLAS/Demo/demo b/GraphBLAS/Demo/demo index 7ce535ca5a..cf2d442c61 100755 --- a/GraphBLAS/Demo/demo +++ b/GraphBLAS/Demo/demo @@ -1,3 +1,4 @@ +#!/bin/sh ../build/simple_demo > simple_demo.out ../build/complex_demo > complex_demo_out.m ../build/wildtype_demo > wildtype_demo.out @@ -70,5 +71,6 @@ diff -I time Output/complex_demo_out.m complex_demo_out.m diff -I time Output/wildtype_demo.out wildtype_demo.out diff -I time Output/bfs_demo.out bfs_demo.out diff -I time Output/mis_demo.out mis_demo.out -diff -I time -I rate Output/tri_demo.out tri_demo.out +diff -I time -I rate -I usage Output/tri_demo.out tri_demo.out +exit 0 diff --git a/GraphBLAS/Demo/go3 b/GraphBLAS/Demo/go3 new file mode 100755 index 0000000000..64d773a5d9 --- /dev/null +++ b/GraphBLAS/Demo/go3 @@ -0,0 +1,7 @@ +#!/bin/csh +./tri_run \ + /research/davisgroup/GraphChallenge/snap/roadNet-CA/roadNet-CA_adj.tsv.gz \ + /research/davisgroup/GraphChallenge/snap/roadNet-PA/roadNet-PA_adj.tsv.gz \ + /research/davisgroup/GraphChallenge/snap/roadNet-TX/roadNet-TX_adj.tsv.gz \ + /research/davisgroup/GraphChallenge/ssget/DIMACS10/hugebubbles-00020_adj.tsv.gz \ + /research/davisgroup/GraphChallenge/ssget/Freescale/circuit5M_adj.tsv.gz diff --git a/GraphBLAS/Demo/t1 b/GraphBLAS/Demo/t1 deleted file mode 100755 index 7857b67cb2..0000000000 --- a/GraphBLAS/Demo/t1 +++ /dev/null @@ -1,3 +0,0 @@ - -../build/tri_demo < Matrix/bcsstk01 - diff --git a/GraphBLAS/Demo/tdemo b/GraphBLAS/Demo/tdemo deleted file mode 100755 index db37ddfca3..0000000000 --- a/GraphBLAS/Demo/tdemo +++ /dev/null @@ -1,24 +0,0 @@ - -../build/tri_demo 1 4 4 > tri_demo.out -../build/tri_demo 0 5 5 30 1 >> tri_demo.out -../build/tri_demo < Matrix/eye3 >> tri_demo.out -../build/tri_demo < Matrix/2blocks >> tri_demo.out -../build/tri_demo < Matrix/t1 >> tri_demo.out -../build/tri_demo < Matrix/t2 >> tri_demo.out -../build/tri_demo < Matrix/ash219 >> tri_demo.out -../build/tri_demo < Matrix/bcsstk01 >> tri_demo.out -../build/tri_demo < Matrix/bcsstk16 >> tri_demo.out -../build/tri_demo < Matrix/fs_183_1 >> tri_demo.out -../build/tri_demo < Matrix/ibm32a >> tri_demo.out -../build/tri_demo < Matrix/ibm32b >> tri_demo.out -../build/tri_demo < Matrix/lp_afiro >> tri_demo.out -../build/tri_demo < Matrix/mbeacxc >> tri_demo.out -../build/tri_demo < Matrix/west0067 >> tri_demo.out -../build/tri_demo 1 200 200 0 >> tri_demo.out -../build/tri_demo 0 10000 10000 100000 0 >> tri_demo.out -../build/tri_demo 0 10000 10000 100000 1 >> tri_demo.out -../build/tri_demo 0 100000 100000 10000000 0 >> tri_demo.out -../build/tri_demo 0 100000 100000 10000000 1 >> tri_demo.out - -diff -I time -I rate Output/tri_demo.out tri_demo.out - diff --git a/GraphBLAS/Doc/ChangeLog b/GraphBLAS/Doc/ChangeLog index e298e978f8..ab89f23d1e 100644 --- a/GraphBLAS/Doc/ChangeLog +++ b/GraphBLAS/Doc/ChangeLog @@ -1,3 +1,19 @@ +Version 1.1.2: Dec 28, 2017 + + * build issues with cmake + * performance improvement of dot product method, C=A'*B and v=u'*A, + particularly when A, B and u have dense columns, or nearly dense + columns. + +Version 1.1.1: Dec 17, 2017 + + * bug fix to GrB_assign and GxB_subassign: result was incorrect when + output C was the same as the input A or Mask. + * performance improvement in C=A*B and GrB_reduce to scalar + * split AxB built-in semirings into Source/Generated/* for + faster compilation + * added memory usage statistics + Version 1.1.0: Dec 1, 2017 * add new feature: GxB_SelectOp operator and GxB_select operation @@ -10,8 +26,8 @@ Version 1.1.0: Dec 1, 2017 Version 1.0.0: Nov 25, 2017 * transition to CMake - * tested on many compilers (gcc 4.9 to 7.2 on Linux, clang 8 and gcc 6.2.0 on - the Mac, xlc 13.01 on IBM Power) + * tested on many compilers (gcc 4.9 to 7.2 on Linux, clang 8 and gcc 6.2.0 + on the Mac, xlc 13.01 on IBM Power) * changed printf formats in Demo/ to silence warnings from older compilers * complex.h in gcc-6.2.0 on the Mac doesn't #define CMPLX; added to usercomplex.h * mergesort removed, quicksort performance improved diff --git a/GraphBLAS/Doc/GraphBLAS_UserGuide.pdf b/GraphBLAS/Doc/GraphBLAS_UserGuide.pdf index 66414faf750db4a65739800a3a17ac4e09629ec9..90b2c01dd393e40dac145cdf1ebb4870d75cfd63 100644 GIT binary patch delta 359279 zcmZsCV|1V~yKSwht*LF>w#}(Awf)xZ)W+1-)V9s3ZQI73@0@$@pL720?3HA#BaV^lcA|NPKZ@r>%}87Th78ZiJJn~AP{II*||)}X!O)< zzYD?~12@ojS|uprNzjz&*b$X&Q2VN|(b0$n;hD+v@|Bbl2GAf-0*ujzU`U-XM_A>U zj1|#y@zpRLwR#OBk!XP# zp#Oki6q$kO;1U89wjHNPek>%Il7Jy|8wG5zz<8h<8*@39V$~cR%EPxRcZ4xbx6aw8rTq4e} zrd~M;!<5JbM=lqFg1Fj06M_$vdneRilto$#1N*eXUv>=841iBk3WV$r&Ju!xE2?uz zvt&hCN1u%x-5~Wub;2`ZrBl^$oDQ%7%l5ZnQxxLxfm0||syieK`@=e*LVN~)q*mw= zg2%86JHqVlFwtq%k}Ryy(r=U_1Ftv}06H8Pz_YF8pyW-soh#v(sF-v78dKGsv?bg2 znI$mqc6bZaJ!|XzJ(^BLGW@2Qys}!Y|6rH0Hpyv~HGSM%m$s5zv9;Xg$CbGZ{j5?{ zG5mX$q`)g#qp`^BwA{x$Z0gE*Q()zkx_Me{YP8<&^Z6krWsyH+1yi@mCiymxIDZwj zTTn;FqW;0DQ%YakGt_&(;}n2~@seErb52*OZ}1LabNy(7{-n#Ao*CBJZ~Uq0+!Xj- z_WS4wQZJN+VW#Bf%GV-hRpz7wd&>Iy-sisfW${3@XWNLOZQ=}wL=)3AwaIhxp^~#% zBzznGf>>1{ci((0xZc99AN}D$k~K_om6e8h3eDM~x)H0_a@3T1b6QjRn6-@Hp8_*& z?Xf?A?47v0Ln!YLP{q+JoZR1-7=xamCG9NJ6wM~bRawnQU)#!^S3@27_G<)!84;V_ zWAYn4__!`I?e{IZTcHhHS(n?Ye3*}(GWBD{h|Qskjg@|5bf>`xc4Qt~jEwXS7Z__^ zrwd1rU05fUHD0WHo@Og>9Wjc}Aj$fMi{d(ft=op4#)I%ex5tOuL%acyz6E%+cH=5& zxsh!WL#f$OGEvCrUKLPPuXS?`1Z(*b4x3 z!2zz9@Tv|)yY zI=V5wpH(NEUw&NLbFFD>&LujGl6R}+938%9^xDnK-}G3&JL36euk;<*-C+V>Ht>c| z$A9bo%?YjH;l-Qc8Dj4jE$`l*yaNpM#VEfemS`+sF|}*v8Wn3Vs-Rxo&_9>e+V~k! zE#Sd*v*>l{uuWZHA^JLcvTe|6j1S+9fHR)*yaY=Qe_?z*UWK7W@-WyY5!u~YZlie2C^e7c_Ynfsp8agz8*txq^l?^SCc zpq2B*;Ql_;PNlYNeMq5p^%V{EDcBTKwVN0UDFw#Doj3zY3#{1{NFn<^Y0mFOHAv^$ zyXYzxnp&_kqE$AaU38s)tL4Sfz^03GetF{$fyJi4PLul8UV14|VyDJQf{Y#q15Zsr z0+)wuB$T(y9IRk8(7mgi2(N7~ZT;(SJRB*iNZhO&aWL7z^GaNb7A48-P(mDNy8m!o z)UI-TEuVEn4WJ<<8^7~69x?@_ncbK;iEQ|8EN;(p@KGB1CpNYiT`+p6O`8TX-M8N& z%9XXq&Qer|eWIz4! zaX=)5n$FW8>(s;jA?v=?41ulG8bx8$3_1F1_iL%A=>r%#m~6xI!ZrAEeB8szY-FBU z4(7z*qc_eo4!PS^Vok`*Fj80|q102UyZV_---D6;i%<-HCBa5pG6gd&6LZfxi zZo5Gsyj5#~)Ir9Y+E*GAn?lF=6)MyistbLuXZ0e|{;`)y1eq9Y zBSOh8$^;OxP?4jmv=)M4dSnIYIH%WQjpWWl@x}@B^_Xw|AX8e=MN=0fBSi#bp(5=E zb93UBHdWHujDVM+ybgw!vCtBNgoXjZ6k&2Aju*xlvxdgv{DwlE=kXH+l$|18co5~_ zxMEDsRH@7#@|$oii0PmrXgB^W0|Vp9F0a1N)s3ScVSor(i2~;nZU8p}CyXzPQtb^b@o8hZNv3BnYQQ{rN&pfr0 zoAax{>yeNBasH|87uUw#$XDX|J3ZjWFiME~cZHQLi4*0kviz7#G;4v#WUy#W>|T3y zO)=I!0ZAW@S15Z=)pm>szB zp{jk;z&826p9vw6<}48BI6^K+8bU*-bl)DKVEW)p6wR1WZ->ct2^zBd5a|w53;48Y zr;At&C9s6psnohLE)rI#andg13+NQv)lmC1NNl`K{!j?`M8buTC5AJzGj(=xGBdLM z53)D5hGS<=6-EL@gWzJ}AWHlTNd>s7%Q~-dAa}iKUeAMC$fbewYAQHV{3^*3FsCWv zZ{Y7CBO}?V0I^*8>iR~AFmhsMe6!tN>VWC0JSE4>RZVE_g z{hdkqRX8TQdP}kLG-|jXsZKE&k(n4K{{%PKkLub^ZzfiPL#l8gq*qjXx&Z{mpIz@9 zz3IPBe^QpJP|D4SsPU;8Db<=wYb2>P;8&bKG_(*OuzLm!bdWruPsh&E<}?hd&0qvK zdye~*%Q@&YsVCI_ZW5PU5Ea`a-Q?x=bSx;}vLn_f9ucsGP@bOMJ^DcE_Q)eLH2>K5 zO|D(_+#$Dx6Y>2!V;PTCD+2_mLtX8!zg}Kmy6`BsTb0hL_v|jIq<{pIVd<)8qHXv+W+%twYARqW68i+ zlf8if&tGY#!=`rXqrg?d?a~|8AZWTJH+g%;THz&~ffs4u@{4UmOdUXL5#_=ox#6-Q zMK&Km;;$_DJ&*67puos_nhKuXgCK!EMr<3Z*-xXR1~wI#LZW~|3&ywDiy<{9O4ma= zmYE5);VFW!nMcz3%4=b)f%#zuuZlRC*QOclsTA7I`pUh!#P+Qa_WLl|Oq)GDu#t2R zeuLR^G{0U!(7o@W77I`$Mb`(;mP_OD6Vm(HVeTIbc! zp}A7l>fJjHqF-`;!S27FE22d`RcbkXQRflqbj=J`dAcRPHvpIvBAto~Zo^F5Y^z&1y0(7Zh)5Q&%8Qc z8gm>j6Nav zJu^*2WtvT=u_+Df>^A6~E(hq`cQ_ zr@Cy#7t%1{w}uvzN|TwX)AhyEjVdoQ2;8wDxR7EmL?RDN8L<5;jl_0FIuMk*QDaO3 zD#I;#jkA0&Pu`V3hjW2p*z>K}(Tz!zpK(t|zSqD6p}{?7%lV#`r$MT1ck;%AXy@k> z5Y=-qcpB?@)2Q=)+ot2u%9<+pbs^e`Ga$ReL><9LY!>jhVm8}+_ zp4NU`?ov!$4(R4lz<8*&BGCm6R(QT#&x;!mN}N=j2AD}&IlgS zfnD~0CKJ-+?hbz00_)lS7p>yBwLOj|FvY0F1Lm8X+7;;6f#}VfZ|_dByNoQO^Fa*c z(kg;-sWn+61*A?er8a_Ba~oaib!O#nOWdVVh)(h8W$APQL5@ z1>ZvL#IO&?5pXdcSoYXi0n<60_U1$e+&{V z3R-6~!+|117isfd+)qc3ac|qQmv=?ZZqn$Xs~C3fH~W06o4U=VCTK=JVpGnT%fcO- zBx4kWYy37n_$i67X#9);)n+-F6ktioH?K;oA(!QOq~XZb5ogZZKP{d$WtUok8>(HK zK4EVi{u`o&T#Pf30u+k^ zs9+G~Y3(b+MR9VcsnQCpYWS?}=4L`l=8xyRFnX#U*|@FTJu!b%h0hi7o*8HXL)|7K zwOSdNCqe^1O4B=0!PF^jFo^b$CHRSp8x0aeEzdg4IjYV>PE)V3yOKd*=P2;Ys74Sz zkWD1sJdCs$n+YCb)L{G^9N>q8Zh;Pt@!Ovm{U(!7$Zp}Xwb)J7btyZis>9~G;j2SJ;LNbvK@br^wOM+ND_Ss>Pc>lZDeXT$U`Qe;o7}3Jy^xPKD!$9d`RvlIk8* zKI_wxl0I%e^x-)Z)=w15A+4P*7or=Wg8ooZ32D(chUextSK6bLR#SQ_OnA=<^En z>xpY-$W}dnawRu6QXU}aat>?=$Wq}}a!C50yNRMR4Vn#KfdbrVa3E{DJt?~>a5Z#R z7t%sBJ6bOq%desK?#q z=@(OhMv(iy?FsZQl1qQzPRG>+Bu=IBI~BguUrTx}DoJl|@1QA3^C*zJTGPCM@0r?s zwopoN<#LqV>+n;eyC8vZ!6&6RqQN;ZMM8Wd=a)x?YXPDh8SEf7ltB8N2juhh!@vct ztpue=KGMltzd1C?S=@PuYk>H6z?Ai+rHpTZ&sdt@W=4>}=5j&-GpUZ|lv+or3K&g){%(0?eomGt8OlsYGTHJcpD zvgTl$w*i!3MWjqPuo3xg=<5G(w^*Qfg|ADx-jk&ChV;U@RO0vtkDMsGR*Its=8{xi z*A8y`DN^e9$3Jy@AWKYRSsD)aQ#CCbEc3jbxgLWkmzWjr#c6gV^~;*BBe!D4uQnaA(dXNaAUC*+4{piti;Bze7x$#uGlQ8c}X10ZEqF9+J4} zf!WFr8I<@Sx(gRT_sy?}uSmJF)sL;IlE;eDL!XUegSwFSiNb_Q<%$YMWU2Sw2w!G5 zx$&S@3>8~DMa-Ecidv16>9X7}v6Js&GLk}FrNogLS8gYH=lF6qlK?_{|9q4p5fE`T zVQDW{<+#np@7@{(#|@@l!(oNnAeQ0D*0fKJJn(6v49GV(K8smjHJ+tqR@F)2>?1nwNnxmc5AR;eqcpx<|} z)NdoAl!v5$CNBi1B$HcpI<-hrP)jPuFN~2(oS}0Il14HIdtzFm030-rw-Zzfu$Ah- zrct)ZL6Dv|hS(oj8nK{O9dR%LZVb^(6jO>361dzD$2hA77!%}qg4tyn6ES>N9+JpT z77Y_rN*C;Ura7)4Ne>(RxhST&5t0#T7l;+`bovxj{|N#;u(-~l0ua-yLU2>XTbL07 zP4s|sGb5CdUQli@ZWy8z`@mxgFm4@yjs}#QrIdnbn9SWjGnmiC4po%7hlQGZ5ZR_N zJ_ZeK0H+@jlXf7>A5oezAH0!y!t4hA`|@}o4IX@x0Z&aPLQk(8UoRcqTEfr|Y$S;X zRt^w!H~2=am?%l?%o;8zZi`?jx)ZwjY0P4GEi}=n#5MRmo~t60h9F)jGwaPS7!DkjQO_#DQSIB4)z%je$75&$bB)0XHw7+8r7@UDsKGj`VELTf&pP4sQ_YH`wsIyJs z6pZaT;(r&sF0{K8le((;aMSPHgbgaZb5iHvEd$)0hemF_>jYfzb0~qcGS>?ToQ-i$ zT|@oUNhja3x&ht!2a(S1lgl3lfOA^c-;fhWpp&|~N^_xOP5 z_UnZ-^dk#!0k3#mQg>CaT+1uU;KxW^TB@+0DP~jJcGKtBQuEbkKcI@lQB50O-=p5& z)HC@2Q#m(@VWu<7lmyn{M!c#uJY2%o3=(nc2{!Rw(5+Y zDm2YZ!#a_ypZKa80Nk%1lcReewa^~UpG$3@ZRMD$Sq7XzXfYL$?In* z)}MAO*`vq({VVTii|_-!f~#6+~Tk?|28{4M?Ix!i?AVLhA);jjwTD_Aj-K2J5g- zlSYqE1%Q5&UzYxk^UM{Ybgrrti$P`X$#%l_RJX+Wr_)d8Gbh>BgLQMh)Em7aXPj6T z0yDlbkE9!_J>{W-D9QgOraUrlxb_@}_M(PyVp$0+`2Iuq4Bhc>f~&fCWlSnI<*Z%4 zJ{`PPue_JqVXDuSt!GYtv47cqKB^T1XROSqPPbN94qTqgS`gePMM^@fT#uMjCi8d? z%Q2^aq$I4X83;_6d9KVz^MNo zC{1!P4x#8-f-%Iv^}VbJ3x?~2!-p6N%7qyEK{KlaF5cdPtF>L*!R72G@>+tODT$y2 z_Z+YdjAsyhSLPogPHv%gLl)O1E6@WW=Yk`r;VLqsL9-#cEsJZG?jx4~V^_5#C}s%> z!LFSgsbI2E%-a?1U4?f}|MB%NG|=<>W*`4SdL@SPxPthP1CNJAHB2T=lUW}u4ovWN z0+-K2od+$@L)+7^K1!4BScI|J9JzF8`(nE5V@t zXxG@J;oU!z@*^YW1tpe_;_*ioK{yo(E3{R`mbMJspx>n3!i72M`L}+@m4ki^c=>$% zumYQ>0PyS8@zk`wLM|C@H`J%vnglXRX?QMou0nf7E3!Olw&3L;)v(B_a9E$X`bjB{ z%Y&8W;0YqbB$&KmDYYSnz1m=k@DNOG7Rwj8qn!zeK(cn*<@6Gzm|^BJS!LJ|grJO+ zXbu@}J2(r8gr4|cuiF6K)_TWq{Ae^8TL7zWOZ1Zn%;LA(5X6UZw6I`b5!Ez!N?nuJ zdkhhkMdG=i1k&LYM5amsyDdWK+grEv^RiWh(&3cLKT2mCPwIoYIAgZ!c0xIqA;y@Q z(QjRK>q5}+D9HC;9m`x2PKUSSGNRCNVC=sOOWgk=g_W=AjamY@#0nV%WNVF-Tr3s- zaY{DWneL>w!N~NO?+^Fa2i2bM@;6UG(w_Ky2n6R)Uxb+H94P-Y=fc6vO2kC;FTI(1j13B( zxX(!kOlHa^;&P+)A(_b6Q&fgc;>+D?DI$}fPI z$3-y2t@BgzFqu-k-WDZ|b$C#D+?;*4H`vU<-z0nOyT%x(En@kfPkrEN%%lTux0 zj8sG$Mejc$eCqz!L;q=x>LZ7&;&r)tYwHa_+W2q+p0;>%-914=wnL?@XaiJEjcZ%q z?x7h|?qj<#GV|&On@3gItXEvHk-+d3k`r6Yi9W23-3P7WNv!4c`Pc+5(=uP5@PbM9?%r|8RFN34!uAeHT zXL6UD)$!C`?UYIhcgm8iWs}2O(tJ15Oi|7kVTo?j$M~wn-RMwM#pRCCeHZ6(U%(pr zTjH>N4^I2q0EOVkp9P8)2s54Y=>vRC=LjweS}F>{Q)DdIv4@O48hxowNcu0rOYMtO z*8a{b96<>+K-1P9W-FD!RFWxc@T2QYUJJ7XN$@6|lm&4j?WMwUof=e0VdB~DBVwnM z&docRL`~V`I_kp^M3U2lRz+M;3(rLLU0A!-(z4gh6msNOn=^>IEx!f9S|EmVs(rbn- zER``O4q>!4KHkW#^Mi%vQmRN|=fk5EH%Ub}6^(+;$g1EEp-$ws>@2Bvcp4CDD4qoz zhHFHfVGl-M`Tee5^dQJ7W(B4lIR*JHe>b;*I3nk!FvU|)g_x2RrTCE+CgKsB#RUY? z;b`LIeEzT*n#^w-ZQ);NZW1X8j?6VEI+mET_s`}d2%;Dn$jIgKq#HScdL`URLuR}e z3~f?qxRgPm%7z)yDz>8BGm~bpuKarZyBmF9-HuId{g&)z{}*cTbp^0gHJxU*B6Jm2 zHBHDM5zm_*9j*_ia-G*O^6#C;o`M&WY#lme7J@Uct^P8$*k^E&Fx2g@7$`TGW6npx zYXKXGsKi-d)W3&7S1V(RrOYM#JwnS*90nRq>GHsslM{;CR9ixjZf(tPTKZaK(#QK6 zIP-dy@Lg{kwfwg-qyP|5CIFg3lQXf0MmFBCF=&}HVkESOrvDM?%3Xaby3R^hf15;l zT;KkUI^Q;X^ylK)YqYstqi(XdSCF6aVJ0$59_uyqz zT8fn)mSbH%w+}d3H$sY!LP94CAWfUEDai^|_C#dtpd%bz3h_k*|6Ox+`t{zS!wB8i zrJsJviY5Dzy0Ld?V+b5m`uE?>t)}`${tAXPBLyWk%8=0lRqx|-c3_Njb|>G#hhhtE ze}x@nJabaDLsFg?b4j)AIuF{cZbCY%+v@sQK=#GoL9V|3El1n#U~_KA%{smXF`w0)7EBU0tCXW$i#kF_j{P8Ni4L;OqTj4f}+$kn|P3^RsqTF`J}Pl5c0;6Ulurm(8|qG63PO7m4|%4o^|k z;D^#wh6Zp+8>;dk>3AVMG}^Ep+t0Wy7-lDC^m2QqCc8aD?X5U&5}mPju>v(Wy)Whp zm#x>Iu!+z6_!#vitQ`Rv;DNJ|v@^=T0|rK0)!~KX=qBmd67RM;leY>iRLMTsXuRux zdAdZRub(kV{Y1H!=kQdE!Azg`lsGU*nephl+yNG08YT*~65!{32RWxlI2#99$0yf9 z3ZvkMvgR!&ljI4a3f$CcZ{deJ>A+1|kCfJ(u4oj}!khOIRtb5Z{az$A2SCv)Q{hMj zUEyP7RE@@2p`Pma)*T7rL>6Gq#PDFRaLHf(8-HR$7gf9-J$E0~pq1EMjb>ysQTc0h~YK#0XYqe(K0^;wR zh^n>Bsq)!&1>NPjcXG|0bX5;hL)_@iQSA}Uk;6N+k3Ez5H$KwOcjmnQPxp!KLh)G5%yOkJ+!}+430jFUEV>G{B4Uy8fW2D}o+jt$D0_ zKP#P?%5QB5AW-THzLZ^Y81?4o1DDX`n!zCXx&1a8)zA z=DFQ>otE_ragr;`v&Cj|S>l`&CzQ;7GS|q()_5up^+*Nd+&4kAdr1Y#g?710E$MOakj=$jp=6bBXo=P2b&3)@y|*dTXXn2Zg#}z zNru0DUe0O%Xn(%xt6OLEd8p=jDA_rCK}SBVtLFN{F#k>XRDO40#frwYDWOO{QTj_b z<+9^fQ0m=>Gi%8()uwgJpP)N}BMW2VxP4W~?ndGGBemXfumDk6-_JjZRlx5xIw-2) zKS5ofoPrrrhI}us4``NGG?inMT-Y4Mp_CmAFYh2!rBS2Ke%of$q}8OEgZ`lo(;%m= zd%@J+BcwYY+N}3%Ta%mzdEccn5%Ak45cYRy6jE*4ey9>ELX4MTkJ8P-Fh+i3$E0JI zo=in?TR4s&?-)jwX|)I&49D7NN$)GK`E4!Q7Ue|nVq`m#NN%7OM|0!hn^ z@RCCq7;5s>7s)|>hT=+}Nd1Lr4F8J{$Ig*htAUnEOb$j1Z0O0_{?u=J7CFvV)5$-Sm0|6BY5I*zx(d9eBu)8Cg54_%e9uz6FD0>QOotDKHB3 zZ_(EGeGb8DzCVbCwCG#eb$*yrC&tDV&Pf@Z*8D0JMpy`lmvht{1+)5DS#h+Gqap>X z&|}ixv|!V2z^8a7YkR%`A^*~_j;atje1LU}pu}i;^G%qU$ z1OSUR{QWcK@dSjeH2ttmf2~B?mFnO7g2*@9Z^F;V&H-hJ3w5%-g8XZC1e>Jy4=b~n z6P?()cl|gx%II!6KVJ?M(L8=OztWc`OqnoDSv(ANg?9GwAvrZ-q4rixVn^ z_{zwroA2F$4}14hu1!hKNfT~S7{%H1Js^=6!?IrS8ae6HJ}&T?{o=*ieFIw&5iYv1%G)$(I5HeA=d=I-3@n~#I zuz{PlfPDI?={^7Mnl)vgq}a$6pL3iwUv}r)YB1NM zAyJ}GlTO*^!e78|(^YG4+tPHrNB*tQzGo@1Uxeme6mQXMX@Wia#P$QefB+9|l((gi zPg$9UUr2T>jOu8p^BjPq5J z9G0j_qBbQ^l1{Ojy8%&hF0~WBC>KD%oxC1t8U9O>2}v{nVqF*SA6}bQD;)2wTFxgN zZ)!Ata5TO4a84nQ4tWvWH1o&)?*8YJ@nm%Jr2o$cgzz_%Pdnzo5S9uJeqhWaopg|5_bcSEZkhm;-r6aI$jMTE=TKgQh<7hq>S7QDG^#a;Q;AJ(L-36nFuYU2N6r6IBf!LoHMkJ zVjoo^(eOf$eeMTV(irKdXnmMX54HVv~&Nc=gf)l0pt+c!u*Mm;YUKN?+tAK z{HNw|=lV(Wn;QXr${F)ufjG3YH=1%I8T<3AVrm=j54utRKiGBXya3gA0ZODaBbC`a zqRpa(mK4%iiy)=ZgfRrUuzj;Gm#I3ClW5XDNO*_H?^}NWtv86u79NFq1;2*g+PR?) zw$*!03ikV{?c8|}bbB(@8##}HABYNO61mZ7d|t5Td!*8m_5w+3Eg%jpxm7eGpHHhZYxQeXlYEoJ{hxmFD=;n7jY%x-zmHo4W{Ejqt0;c{_<-dingnQ+e!`Qo2KiaLpbd?v`&GXpIi|7`hmni2TzS1g4}374XbVV9#_z@*L8nC9ad+X=0P#fH zKdJEk=Ue&P*`yA0WZ+^d znJf!TjQM>dMjd8Ovy{o2cc{0GSk765>*(W+q&gy5d6Hwmj%qsX%)U%Voxf`ku5rOnZqbAywN`3iOUd)9}^>0_L@f;C;i*nZ=WvV6MEzr98 zCq^c+eqGmiWmaN65hkt$T>8ycMTA-@%Lme6zW_d>&(n=`0kz#^n?8=l_M>lWrmpfn zpaXxiW49ZaR!w8R!|*h#7hatc9e0owbG2@$e;IyISIl)zL8=esU(tRZFw&d7JbE^H z>AiZgO=#8uYqF+TlIEa@qfY~B!$ewm`&SDtGq0CMoxeA>;*8=hDSszji*LK;Tv&q7 zIswJs)uxfjVtf#dApZ($kBS6?Zc459#rm>+^GRL}&Ob;m02QXX^)0Kn5vI~+PW|yM zY{}5k+M9;#x?ubXN%N!JiOx`kxExpYJmo7aSmCQ9q=Pn6!T1T0G|~2Ot$9J}gyZ^b zAc)G$Z6_Ptf7S!_QE;|`FU@z!2#SlPRAd?D8dC*EuCQ2L;gYE<;o z@oV~=%_w++_>29J6;k2h)7Oo-9=prb)~#IxJ;3LaWC|L=DqD*G%~Gbc>Ju#nOp-4I z|3Kg#z5WjUtHggme6LU?Di8;GlqLXF=b%iUcApUmcYlT&&Q|k|@w} zaEy7+r1d7s;n7fg{eGuSzoho!r1ax&Fl~@Ff*30epXTFcy4v%p!F57R#&X z@TmkgbczZxPfJ9egbt2N+&cXh0jpanznWk1qiUbA_%4RhK{s!X*)enF#SJ2rK7$jP zcDDx0Y2@Lb&>QG%-v5h}$(g!?{ZIYK&B{cSsN6ygX#5jDa>4&c{76MOa0}YSlQqT0 zYirKC*3V(bXoXZhK$e|+@Q2L4{;MljiZZ2Ob@VkXuuWAdf5jP3iwDLwxGH1l9 z@3sK&i=rGz(u(otf1n5@I7%$9lU-UnalEl^+MO#fNd~Ol zTx?E$uU{D|c@;G{h-^ z>T5ptpy@#MktYb1b?k*xu<+Ct$ zWK)A|y0_~6#jSZCGmRqx(jGP$WZ7^GlCnDGfxWv=P6g;r{9ewmx@%I!BZ#IZGb_N$ z*U7(nt48&<s!s`aYZ%kczdVkuVX(J2 zsX=^hFzOGl;IES1yn$NkkMGx}O}~%$L~V}vzFqUgJu&$#LuS6Mu5_A8%cbD|VxBL{;n%6qqROaCxl$ZDQ+@xi!oZ8RWj1t`zE-@0 z@6EXR(nN!@8#b7E;AsXGzx1|z_?z82me!_4mp2a?k@npdw`f;I@buN%(B!~DT>b`; z$=NKv0x#P>3U>9^-3k`XjmR3{F)Ty5RCY$`N!P^C}L4JpU zbASoCHvMgOuM&nkv9afgAVL@* zO38)kOk3}!&6etV%6j01#wwT#aSd~}k3A~w6Qlku@!E6gg`@iYn#BhL$C2{~4t`Bd zt;;Vac?`}oHQhBZ$%jiTA(T46y7L8{8ESy*zBQwBtF3Dz$@jDDcxG<1Ql6-lZ&6p( z1YhST+~C0CtM}Z4xW43}?#Psb-!fhQfj8h5O(?$H%$1qhH2AOpEA#2$7|K$!n0sW@ z0LOIEWQ%9pO?9MV3b3Em_*D72a;qV|AB)mrVOpRwJS2rQfA%$ZQHSwn+xLbLO&Y|l zBTtp7$_(rz??r{)1wr7GY1OgX1#+$Xf-W!W!~ai1X6Iy1tQDq0V1r|nGqbaBu_R*Q zU`izp{wF~g)Lo0gZ$|OWsoplZ?K7%nAOr37w-uK5hhU=PCqudaF&-%tUW<1O#s%c4 zX0FDQi}LE3goCvIP;{?Ky}B0ao`KhsSWn-^p;ft zm622=QAgyFKyA$alSGC9R;iRw1rCj!5U9z4B_qQHvJMAx2u^&S>OX6^By(4L$NEW8 zi5~?FnB}E@VKtgYj3f5L6;|x<JUA^xs`e2j!j8}95PJz+g?ur$83xLr_n}4Sp%roPV1uZl0HgsL z=sB!tmWDUdtlN3BkX$#4X3tcSXZW=_L9<4mos11J5R7(dMp3alwSg8&v>!iaWb-fW zBToViOjP9c`(dZq#J6tob;S^6aFm+TQ+(==7k0J!a5$vh2W; zTqk-l-}djLrK0`G+2y=S195d&2f?k^$l3n`c*OR>9VoXFBmV&K|54_Ct2B2bx@~ky zL9!*gPGpm~cf!-k{DdPPeh%<_^YQ+%#M%5MfZyHf{Wdqb#tnIY`h2u!7_-%NKb)c| zU{%e$R`Vl$qbDy!rJp1QzSEg~DhulJ&FggS&gFe>BeLDa=lAFJWq<5v*r)xaUA^h# zyW~EVs(`k@Km{L2Cd2Ut>J_*=W(k>ons}2~; zAfA4hBgD7y*}O$on6aAKNPmvfdK#T&AqLWEfM`sIK%U{ud5wvSq_rQZK1q*HO=VQY z)yiQN)A!$hRW7K*-xj7YKRQ=Pa=*XI(Q}Fsh3l2D&#$tGY>Er<-nU9sg=={V#iIP6 zah|?pCi%LLI`DN7UA*1}G=%?X80p4{FdJHoqyf7PlyxC6^lxe6O;%f|;e3W^G zFMuJM^@b!9L6!i;_7bKn&fZKomyU16a!wh2I_l% zE&N5H5((Ju$X%8SO48i1aWK}BPtU@+*TazQ_QGw5un2-Jk47dp?t&;gN%H}yeFW&i zR=4}h`{IJ?r$7V2}AHEnmH2sL)(v*yvXtYUd-W{I@U)y428T+D`ymquxyNh-p_ zx1Tgo7S6mEB253qK1>>5l@ct$M(yWn?o`OLQ7Sd_lDw5nfsA-R1#He8%7fWN(LFgo~r7ZcRhq1-ea;8Snpc}U-$h`1LQ!b?L$#s03S zUONM|OXOrP_-f|+IseHg&zLooz5lZHb)(2AY05~$rm3uzu!bb~^O zQ}Ge%ulbS%(Xo_6(JLpcq>4O)4#5TqD$l>#(mnLS4s+o_8QI)KB?o9Bc(U&=#A%8e zAb8^6a_u{g?8OZe$BGh!@ge>fAw7~9M@g(Gk{!$k>xp_xw*McZgji8Dd$9ubR<-gq zJ+3;G?b?V(09}LAVG~B^NM&gDQRN5{@smKRIQpJfrL#${`{(91;q*PPPPX`b*vS(s z=BCgd)Scgu6A<7jXtE+qy_I+=px!0F4R+!;l>heZcGRB9uYI)kF!t(=MD^^3B8UFx zhP>-TRS2&^i+{=#snS}bKC zOUkosyApxQE%LUHsNxHg)!MPv1xcohwh}GU$x-p?K44t!*tJ~$3H@iBruo*?c9N6m zZQVrUS@KWzJiaGAUX$d9*2WW~s*fmh(#cC#6waVCw)RUS>1j4^?jqX%#zkz3%}0M; z%#&SH^f_w_h3nqt5f~Trh*kt48-mC$Ah|j9s8tN}KVgdXIsX>+Hpt>R;NAXfR8J&s z0!!ja@y?tr-v(H^RvDNsEZFd<@XZ;upR@^;2{X+S^q-Wt^d1Jx`xR$AAV#aavGLP- zRd3)vf2N41XuECnZ?D&%@9$rjSh{tx1l(~?Rv7?#7AxOl>`Yg(w!P5DFa0_GJbI^O zJtXPq8LtQypWwl|Swwr6NFtxdP}&nO14a%J6D-18o8|8{4)zwr$(#*hvQ+ z+iz^!M#r{o+qT(JC-a?k=5FS;{y^eq-ay83SwTE?EuGt(iJ>95~QC5%)rd`Tkm zg5neS^_I5-L@~dh%W*Y>Wy-F7bNDL=fO?*uF z6v?hHYl~H3>7YkOp{toMFRzGg`m?Yun#X2@jzrj!Zln?`o7u$=v7P7Cu$MP~^h)oW z0nyw?^XkMtEU`_;UV%PrYW>)OXMX(R;{NPniTh&Iks!5vp(J#nzpwE}E|Z(^_M?Mr!6b1o_= zA0)!q5Kk^BAQm&BMPwQOy>I@85Yts)Ma=b2WgwvV5ou3sdv^b#Q7WEj%?*A1wPHc4 z8l{|NHDTyuQ;a4lv6#Hb#*`&b&YE-5cPy?0nRaL_)&WxR19hBg!wB5U`!YoU_1#kK zHdoS2hEqXA?CE}<%ul_49p1-<*Iok3R8#<0fFXR~5K@Kyz^Y^Q+9aW*k(HCnLl+sJ zmge%`iJ1o~@d_;5mbA$3YBt0AyO0{;L)*KGR z5NGWiS4Nq~FGcLk&!dK0s=F_q+6RohV#6NwF_@Ig#msT#k`dB}Pr5tSBsgs}#hOMu zkD)yft%;z69D(Z#CJqY;!vy1E*RbZ!H;QMHx<}xxZta`P#;=qKqLXK^iD}`>Lp z1OGm$Vjkc9UuLDT{bxe)4B*q#t>*&Xa3V*&DARHFvf}Gq3cWvq#rF50P&4X^f<}IB z629248S$1Kh4xBx?zSRI>W{0W-2et5@%rh<+uyH~Py*OKiqK(Q7j2NF?z_%O2=nPQ zeVUL-Wpj!*nRRdpJ$b`JJ7!6eLbJ^ATAJUb;%56n^G88vEFS(X^;^_1QU(2GZcBdj zkrgzd?*Sh<0xm7kqfkCS^o}s;F&U3wyZ2*Z%msd+Cis`xoVLnCv|{pN7agYk!qFx#^nh6dtZQZxY%O&?b=MrFAH*9(JOevkwk8T7 ziRpWV*EY+?>@-Ke#es*-uFCdjrlw$m7+7!OSw=>J7CIOyZ8X{cwgcdW?D2UQ$>#5i z@SyaND1w=kNlJ=+SzT0pVDq}S*CO;`SwEHR-p`N6VV0X4L0mNEKfsOX(J+O&334d< zq5bJ65cR*Iy)FMN1iVO{`8R>6viJ+8Ij=AHHUuY;#?M{XKLL0Rl+xXd-9KZ0Kz!hl zBH8-H6x3Ec;=AaOAuF=19|*(RJmBQI4?1`}zHt$D*q^RHX*pHgr4- zV1Js-6t2q)0Y5dzpF%S(q>5NN)&nDT9%vcGXO3^@W#>`T?qN$&EJ{oIn6t$C=S0F&D9+knt94bp?^_*?M({zGy>pSL^`761u&I<_zbQ0180+N zU<_@}4$TP_c;G!}NeHKo-FU`{PQN>2v;Tz~->fB&+WO#mS?jwTJ}xdj{rkE&YDI_e z95d?Q>W?IAOthjkX{UXmA{Gj%L>uw%oYU^;D9Z4DH4lYb^K$ikcx0=QON|e$QLhM5 zH9Rg~6bHzm5`K-V4`_O22u?&iS^0*3RsYP<-}Vz@_lp!`Ji$*C>9Koa8_#`Y*M@o) zegFP_w~l_mf+pjms8`O7nW1vi0=$1+;}hHABFbh|R>{fLlGL;mOZGgc5Z7asX9Rt( zw$CPH>7iyUL>b;Lm~9p7U(p8P3Ns52seAI@VE}&>!DBk?^*zG#F?{iekT}#k&oIi1 zO-71IkM910tLfDTDY`1vQZnCQO<|l= zGyCtksuVWH*TBVgR@bpwbnx?yct}+XIjoT<=lk(WGfzvaAt2_W1P{l?wSgJg$F86_ zi^ys#m`Bej0e$qd56+zxGuW%*$9**v6L}^h1Wmk#X>xn(jYc@q4G#i_7}*LYRy801A5^)cFBEPHC(dQL@QnZ<4)m@Zjvdn0;-CSd~s zwqq}zbIb2_u$byZA6XN0OSwV_AVwi%gEnfS=XsgY*dtNnD0F_%&g+Wm{~yHS`5(l> z0B2|UUuQZ+TQ2Fa1-0k4b}m6Sf_)cR^NqQ2+>Xk}nZiZHp)@ve988Z?K_2AE{`FWlEvl3g_jh+!psx$!=i%%&(Bp8pA1%iIwRDok zEc-i?HM2)ojheQB1ojkiMyxjm{&t;xqtKwFOso4b|F@~cs$|jLOuX{J)8^P_k6Z*;mC4sFak`E5?p%8*Wp`$wr-*DXjYWcU6g|--|MHQd(%zcak(nkrp+zKL zkyh+Am|aAqYyd(VTL0HdSJFTzd`Q!kx7ACEHf(ib;|tS!|K?&g#76v64Nk)7jGE4)S~DgP-Q z*WX?Z0Y>iHIE6sc#}O_*IDU2g4?N_}(nI^Ax6iaLcatBni6qlee*=rCsktLn1Hpl_ zUIGFYBGh^sxB3`j@JNdt<+ZyCo&QNQ;3Hy~yig9dY^it_Pc zs!?5SQ{=4v1=Y)hk@zvz?MkjONd+N`>=?oPV8t#Ue%#X^?E)xp85PNIbKI5S&2YD7 zNK!g6e0vyB8u0bR%w00p{4*qx(DZbh%QDwiJif|8$Pun-5tCOHNiNWtoumo zf0m&o>U-)e*c{Jf#U*Z>lo)8HmyoVPL6&Ep$TzHi0L}77t`zm}vZU1gVm3~wYCNvR zKKaS%Zr+TQKLOC)INalxogm7qmP+ad0=xwvnccVww0s1FS(*kAdc-!+Yh z7&e=RJ^kkjn+mSl!XwJV6E`V746BXN`3v5}pq&&Hx( zGzoxNv=T^vG%r5if-wbFV`B^GLtTZ~Vuu|9i#>4{-`G?^K${^o1nS5?agbhe+Axf~wG?$fp?t;~FnkSVNeK9|W6 z%QC4N7h&7P6ueY>7fu+G(Cwh{(;d+q#L6H28{G<{_sBj3U`Bytvobsjf(+F*ohuM% z9M@emS;u9$P7fh>h62J`H27rji(#YbP=;mIl7c^bO#^&#+S0giIC^?Uli?ujCzW=e z*o(>!U@N)K1L0XTQOCcOk7zNfPB=t^4F~8y-Tq<<&Tq&ZtANC`rxz~Vz*nUDW6nb_ z&oM)09Q>(|Ew2YGg=YrPgfkL)%|iUF%{K^OmZ*fKAqeFXXEj1+JM?S=78WaKty+oB zE0n8@+C4}OkgyHXPee2**o14mtkHtUqkr#KM@E3Y|1#|HPGngxg{dHmZlgwgXoFY| z0$V9^G+g}|WbE%6rNVTCLKj6TgQBnOZL0usME2z@H!=xOlvjjY4wU#K+c!w$&VcmL zvcfl5GWN||nRX;MEtVf17QCyZ=8wqmv@w~kX?psOB^PPhpaktdzPhk);x}_ge*v>Ja}~k6%6b+# zvel&r9mp1W%ZVfh9qNRn9HU1{XC@6xsJt+)Qh!cU4=>Vye}l7I(t(0Si%yfvNG-w8 ziZsG--g2DF%t{UIX{IMZJL_C-UF?`waB$3LmtFw2DEO%poXF4gsl$)V!xiU^*$Khf z_4cD}HL-gC8bB02m^k8-p-bs~QK){gT!4zCwM8VV;bGmRsv6&Kx1Yl1cO2_N%~j!~ z!c2?kHu1C_5;o#(YbVrX)>f?O|2r{z|LqQ46NGB__sK2M?8r`-SQ$8Z`rr`%6G_ss zx9bJ;#GTw6Q=)902KSQjK*z(MwgwWff)juY$Mos7P(sU&$)P!ylNcCL90fE48_TG6 zPz^M1a8Mhi_TortCpG4cTtWySNjz~UCxh&ij!Y9$fKH@NpP#%zDg6GafPajce@(o` z8BERz-cZjck_$%nkmX|4{_cRRg-gt;ZtVcb*_BV+H=@{83J6M3SPh5Sdm{9PfhK{A zKoHattHAk*_Y>WlrVIbwSk_B~bg29s1dsii0;THxO~Jo!LP}{eP36pC$z#WgF(xt^jJ5#; zg+_r3tn~RV8cJP)5AtAHtQEo~a3jsex3-;#G5j!xe!k0sh#kdNRx`y7`#iNx1q)Wv zDJGqALf%n}XR{B$3fhoYe*GyyPjw%2WXuqgCGb(z;ku;-8?b=cs9mzHU6)+JV2p8X^KJ4*le zCE&_nh_-d%oczX=ySKXEwQ|N3*wNoL+*@$caw z#i1Gnfx$%RPd)P-H2+!0^^FML+6kjduP&fm%o`F_N&4;ad56D9TipFWaF*m+iky^Z zgbB{g&G}!$h6d2qj@ukV_1iTpkb=W`ZeSvU{bUYZjqOIKkIw*s_xRVB!2M$bgQl^Z z=OKPKm$$={I(xm(6ZM1IRa0~IUcAMLmOA zwHEDnFzflnuhw@0{TePwiUdBa9J+R2>h~J20XMws$R6Mh!`FS|rECVhS>Yc6wm5l(JFq5M78s>t$t1Kme2cJcTAa&bryUI4mXtJ8gtCgO7$Zv z`UG-R&(!MZIbKt;Vev5w%v zIsPKWnpem8dBRPd1^>75Tv}sNzP_{HEUXbJtm0UIY7j9M&%Dg%FdxKj$j9PE=lPRB zRXjObvY(sx&rWJd1%*x;MX6=J9!tjV1iEUEOdf6UdnK_3V`E3ph6wZ!ZHD%TJ%_`T z{N@?(%aD$PK<)Tfna{-<9>mx&kQDS!qj1c1-pM2FeVPc6+q(AZ^|=d&1%Ysk={ zh(TR%18-tI@c2X6K9#>TZRM1nq-+i;S}7Rbf8f`Yywh2NP%o~Q;6u!)@}1FEQ8PK* zlk3g@{_w9z=l|Dl`{}92jSY+s8y}vf*tvNzVb*G0MITSi(oWZSlyappIhal5pDqL2 z3r=&%PJ3RS8Cud=@<=VzG(YO@2Ll^$;o+uAEZYoY4dA@gR&AQiFoW+Ik_LCJ9YBs? zb#if=QN9J2MbEH}*=~AlBigDb>$X6DdcUDnVb(W2^0gW8SApR$w;`{zg+T5rHkzUA z9>|t6m%kg@iG1HN;8>ku-NC~^Y^MQBf4JEn7IIPGr_K((YZX*p+m1b+x$&|O*pyfU z{qH*qDRCgDv8pP(s|c*663cMf)<<<1FwXv76@K}(dNyS`!7;2l5+hl3ryg$U^j zuKLm0u(tVMa^1kxEEO;_o`c9~Qb;5s&6HnbtxA+1A2u}Y_3jQaRa=3SjT-|BPe!~Y zmHp^-N&zDKFJXx_lfO3HE?%w(s*J^d-lh@2$prvO4ycy*%kUS z2;<}Y9$a-Kw|fpHofI_GDR5Q`n*Wr*ZltD>h@?K;!VM^F2K%NS2g~A$`e4DuW{q&N zjk7+d`U7WGBwZuAwgb*h{p#efb z*Q?9@M;ezq2#G=Sj2>*t33>?Y&sYZqG(PxEil5~7d!0T1%1g<}%8nIBy&Of7BBwIX zOZrz+{Mh?Lqw(^Jh;P;~syQl#{L3ISmU7n3IJ2n=G!=5i6K;WI%q z?-y-?h1<JLH`9u`XjcAWdDl5R;5_rw7B(hIxfnvzt0ermPm z*Hl)12t;7Z>2M!_LpF7N?9yv+lThxdlt7+TFvyzBZWsCu9sk*Tyz;PFZI;3={cR^t zN+Cv~1o{zlhqSe?PKYNKgS3IsNgt*Go43o47YVG5qD3i8wjZ)PPtRKOT8`@(MHNNF7z@A;-9!oSQ9f}tGbq*^ zp-S4GW|7|`TnrvrmQfRqvio+bSAkw5y%dX`ryl^*XJkvo($rXT)-*>s7U6GVQX$Ph zsw(px+kbLa+-@R??)2Z|T(J7R>Et^~09H3j!`;MI0^=HxQ2t5VHav6kc=}JD@#&&= zLIHTPr}MSP^&$~;8J;;wR!;>L_Rx;5|3c!>aOt6b&lD`oz={s>@;dqW`9Fn`+Z2)+ z?H&Mq$Du7BBW&(wnRDDCIHL-B$el^8BlG`I&I_D(6P9p($Ds|=A!zyD~8 zVx)Xk5%9be_B{wHI1Oslze}eRD41n7I(r28X!!+)#<9P9{ka5tKSrNZ=H0d#W_WV;WpaTG)0U5mf!zXTZfE`_7yB2fleGH)@{b z8O`DDh867BLIT2wE&8rk?FLVVQw8)0GX9E9I~$9{9dmiGA~R}Xu@M+(f70%UZkQ6C}nQLrAJ~bTNU7&=y;RIfyYRN-tW8h3wiP6*QztcFzuJ|@9iO2g3okW4_&X*yAI%#YJ!IT8P4 zb|?m74GGRcf7be$)9q!mc{0O>B#^hOSovKvnio5;O^8LjMH0hen#sqfNDLQkfn_gI zGpbVugy49>-J%|)bV;k{!fRL0?Ci{$78n#E4W|;-?;pO{N-@>SK=hq~IBKgpp?zqc ztwFi=oeVYkOa+M8gUASTaECILmt3~9m8XJw4?2Iq8P2d%?(jlEAE{l~NIp(ZD}93+ z@W4AIIhLY>G4mu97-6Jg;e-G8!K^Lke8`FR-yFHl3AWjk6+$}RsKV!tJVU)SQNe9g z*L&;;BqDxDKT`b_XY=v06(8}AbP-qMdU&X49t6)2*k+L0>5u9nwkP}W{cw1k{;%d8 z9ny}n<-I1x23I28rA|(&#CltB=l55my5gh`)+G1cJZyJLC^?T)@4cfGu+v_20hU?{ zT2e~euPOa|JFGeHJQ276{;t0s1d-g2U|3-?1dE*084$Y5X^3Z&=D8miK6cXsWe>$( zm7i%JF=k(bvw!W&?U7+3?fpF3-X25ypUL7pqbHe->(fT8#wZ@$%CdI9!@IyJcfD0` zP8Yn{3#pi5layIuuO~hLhsQXLO@HZZHl|~;sl{w0#4zhgD&7&;Ssry?&zcEBeX!W4 zX}$!erDCh}W1cHB3?Fy*@77lG)-6VdKv1~>a#ZpfI$yA&K}p{`Ew>dCxkcAi+mBza zx@~5ePE@5ACJMNx#;FY%HFqg|4oKVwTRWr_Z$GgIV|UhsOGvW;>I=Vt$#ebPrWd=H z&1?0N4Z50@ZP&qbQ}ROZ*TEX*Y5^?aYOm7nP@U#kmx>IltQXorsK2YX>#Y?xmSIpt zj}`78?_ST_bvfYbLj8W7mjMxypXV?CQhs4ndRNrYAvX_RaeU0Ub$0^W<&Y$pBCh~% z^WumZPDyHnpUpDx_WE-Af$R`M{ooFlfXC8nX=zleW1XG(b5R{2FBk*(D{^(Z#Qnwp|wDz2{LOQw~~3;6I< zp5Acki(OUqYiE}R8XahSP8}nZaU9FK3kR zasm!I*FvCOANruTOO|yxq!yitHadY>T?m#IW(-5#IDO&J&^cz`)+Zwl1%Y$2vx(WC9;;d* zVFkEXzcwRDEVPb#*;lS$wMBzri)-M;ve}i{H|VegM=%e==YBHe7i!qSaSeL}eS+F~ z+4B>P`p%6*m7*ZWwp`J4lOmxOywL-KKQG0y2z-)#GOB(cF^V}h@La=}Z5eWU zhgyw>3SSZgvKj5*0@XP@h#7n6Ayt5-j2K95l1GmyGzqnLGf}{iLNw+3r-ZHLMTsQx zKq!Jad1`voN(+I1%+W#4baq4;_h~pt1LGyHnP%p{CR7qt#L5UkXg@{C4Sgvd@XCHv z{Q|b=aSZBP78Tuj_^MPT7rXwSSd`tI-3@KNtli6LW-4fruzm#`sTicQOcy}ME_K8P z*Y$XDoFkgjGfNB8IG8qM8JY>ykK(h%bG7wY9e>%*b(cU!#Hbie)72rgbPh8y1$<1# z@RYsW;V-E_Cjs9*g}I)pPd+DEni4j`JFJNm;ZtL*as=~gsR)hVxbl)Q71o_W`)Vix zG|md5po((_h)WVDRA>Zbp$)(%VYRaR)Ek=Gf)Sh~m3#&x#fFcYUSm_yt;IGc&K!-( zCJ%w+{;y=AF?Mq?;?I1c+@|bTC#t9@8ayQzigrR;?nDxY~n2rQEyg~^IPR1 z9^hb0k|s?pp_ww!j0ade)Ol4Knz^+~@!&)Wf}}zN5w5n?+$WOjTlWC4&U}2?zn9og zFQGrslJFP88=N8qpI&xN5J2nVO$seeF(`GakBWZ#_o+&TqoQO$20v}bAsk~IJG50P zH0YR6GvhQbg*#I{i}Owy$Hq*Gt-MUrkB}Gtfp%?&wk)$f@vbr=RE|mrdmED5Fl$2( zJBS{q--}Y_L|qiGz-1T|M~!vWq0$)Vb#-W81jzt`_DJxQfUO^vuP1(q<)?eQ`q9N1bxqJ#hm zwkYszwEMPW^k)YVB)@bzDAUXw|LW5ur{ppa#6p(;@IRc4@#caMTIvZ#|^J6>S@b1#v*2 z#!4{lnJuP9pqAP4lfE>Q+O4h=$YpQq3}ICuy5NAcizHKtD~*qW3!P4&P#N1NDHT?W za^9b$Y1V)HdOu#2A@grL%&=AupPV3NvaspcqE)9WpP!txy(ukiZ?4$= zRVAO#@$HkuEV2YN<;#P){jN5VR*GqP zmo#(`}61LnH2nbfRD>% zs#&4G*-#su9X$K(e(xY*ncF_Zyr-R9nt-R=k{plBa2p6IFD-u+Gr&KQn2zVG@j0(+ z-TI=wcdgU1O@}J=P21s}sl9a_-%6<5Pa%#qh`CKLzwDlyyHvMkb#x=%U1rKqvj-V3 z-r7diurxk<+E6ET;llA%I!|+(d0?~|@6m=vLd2o>rY}tmd+3ER!8k-=elG6#Ovry$ zmwyMsHjbN@jG~UN1vs zrQ)?QRCLi@z##|+fo^SKTaM{S7)A=U2I>F4PL^FgisFMA6i-A{{Wu09}PjYT#SLAdM&p^#UaAc)N{neP4&`tXN!tdI(2CRY@jT!IuhTvHHC(b*G zXF{#V|Mlg{!Vmx{k%X(*Y-wX)7=Qk)Lf9>z<7N!gK_JcJpAy8H?zHm7ea2uO!6Mh4 zqs-&Gj=4t*T_(aB-6Hg%kQJ*(4S9+;)z1wPm_q<_Z8;`}g@Z{$9D2qDd3c_~;7SC# zcm@y=EW*Oy1M&4jIWPHLqLPrBPMg%-l9J5);GutCBL09lZHFQXBV{XPkiU;EWAz=K ztX8%*eQj|Qe51#&Hv56!D5%LHM5c*Cv<(;a`F3LAYcJ)>Ky#nD?v03S{i3)* zK`OyD8iNs=J#d~{Fw1|>;N(m5?Yxalvi&>S;l2|10q_=7pLA=0`1mt1?l%eM$bE!! z=cNCs5TO>WOu=#-+K(Xd#8t&8WQyZoKvr)*W!4OsW940GDl9n<^VYJayO^SXgw=*T z$_;`vr?s{*Fp-Nb9BJt!NA>6ccPPtJ$@|mqfR+KG9Q2s1`{|rCQ_c!V9F1l?O-44* z$7@LV0thV(^9WIjZ-Ooc0C#T4HqxlAyCGkr+sw28&fslX&)F4~#ZRHxK_Ph+J21ps z#=d&`Jj<~UT-6W-E_X^~u(5gAVz-G~KBoI-^I!g1G{8D5$7G^eF7F=FA72)3lB=cG z8F7aR9wbg{_G0K>5HUCjWF2M?%$ezg7%|y+V9X_gPy`jcZ&sq~aXd$i5y$Fx`mG@ zANrm|cF_pY>^gdUF{zKzYo$C>Lc@L`qG<<8xkV`ABe!ga|!LRZ0^` zpoCl04!jtKn^D-|L3&pZGsQmP4&7x(N{26U;1R4Aj;~iW=+P8j1g8lZ3Ss7ao8Y5p znVJI(qUF&6DSbHF;^gD_4=bg|!*uiN*Emi^snmPysL!EkK%S$uP<)pq9Ef< z9vszZyAVPry6n*(iH@+?C`9gNYhNz#;W)Nz25TqF_P}n!@xL#nRsZ}a4-g$>$q%d7 zI6)@(K>a6}|FUtYX@?Zilj-^3QZ_GAA3P@&PPmv=Wl`6nCjx!T$-!c$fnm~VKmoSv zBfmj6)AhJ2%&ri|3>66sQ$cA7#Mm&#DhUBN?}rCeRK4Iw$L|aDW+GD&jDUxs)HBxJ( z4>Xap3Dp7pDK5jDx#;J!yjkwv_;|Gn`yMXrT20Baj+(PSdm|1v^GS}D4q%BtE3`(K z@?sH7uj{o=p4i2)AY;Z0O<0ChEP1+zw|aRUAdWhS#o@MR9Db$Mx4TFSm!_tMh*y3W6V#um+wF_JXIvv{!yU7Xz_$dhk}2St$qR*M%V;)|5Y!(hGdrs4@K-v zyn8?!YK#>l6q374+K_%2xbu_;&qanbbEgKsie5YC{UhuH_Ns?6n%&yZ)K1P$shcdf zXe3rjqm^A#BRfJ4a*G_cQW`U=kH*7sjxJ z0)j?_(lr&{FD#=jv7-6jCK2^xAV1jL-9aC_WM9KD--PG3u=Ed?qPInkW0%6_GbVyC z7g-o#`_Q~mH6VXAVl(~I=iY|s(|FVx9#Epi1ovW!tgm~1%KgQ@Cy&}{cz`#Kr3zkt z{6Nd7pV{4150;JwL}#dsjqC;OP+cn_ft?awrLl-1kBFT&pArL$SZ;M zlKBst3McFEqoa_9tdxIJ`hYl0z;gc+(~jC-h_9OCX>Q{_T{*7JtPRMP7PIE0nUsG( zi=U(^qauM0ods<-qC^*hI-;%)t>FY|*9L1hb22Bb&jS#@aT!FRFh#%kMsw2|S1N3EIfyYsNg0Uxlm7 z^Ia7-hZjcc%Fv5F4Rh}X-eYnY!|ZJCUXn5>ZxqS|@cz11__~Cj;X-@rj!o*Ii$Chp zLAAhUA@a6>E=kI9JA{B-gGGmVG(vrfXU2#h*m)Vj{R>xEy?fjfVczgVF=cj^ zbB{8Vf_{5%~N+nYu4Y^zQCGh(u`L@F~N8^{?}Pv)>8Q|n~vajTYEPNiWF~|Nj%vqdl{6vr0@ep_tvKA9hW2Lz*D2h9RLsEF;lhMgnJ$E78;4#_RnM zSO$7ZuY`uGSDlYvPoEGVOODQpSd-e{dFPuf-iWMOo7W}%AogoKksL}U_M8WBN_m`} zyR&E=5=@ODV_SdRty1`1T0D|?ti8~fqL!FX2XU+I(w&|7!z4AT%i{eLh9`ek6b^So zRI{VRPkUAz&+R11ioq^s8moM40AB4Z=lOFwa>JD^NQsNx!&z|B!{hn%4YG!SzAYMO z*7S;*29v?6vy{WFB^(lYa>gCt?D#!7u*QQV@h5Gv;Y&Qvp|YltJ>?wANthNy9^;v%A9i{qgj1ONW}?Qbi^UGD0Nawoj_a+qdaU zlD-~BHMI=YU6I6}2YiHkCR{bR9C%YgqA4B?( zK5#N+EP0%qn$zM|P`n?lC!k$B+oC__aH~&=rql^nz5jT9mr8Y%4AMr~@$Mzed_;Rf zm3!t)nmR0132yr`;>iyXw4`*xY@*cimEs6QOD046sZZSF(yeiu<_I2o9UGn# z*V!8_?FC365$(_0p0&4$yb@~HoX@bkrwKLIX1PJ}%nNTVtV=5RIagC6BF@q#vl!e$aMNHfA%BbVsR|5(2D}Qy0(uU5X{mFI7 z;8c5m|LL}xL<#^@9{4}oF1wO92XG+lK(q$FG2*F(0s-VCklu)i0nP9vD;E^r6Y(<# zQ%AUhewW|9(C>lx)1qq63D6k?vAL_1uM?se z!G8tNhKh;O9r%VNib|BZ(GNj@b=%x_%}S2QvN)p* zvs)Dn9zX3>Tr2^sAGlbs1YRyxOJ-)pRma4n&ZS#~w*x%;Qle=mtjlbu^UJD|n%?EH z(OpgSa{)l4zSEp+O-+R+)G2od&cYnZ&Xjau3~@?kO6%=|kxct5f2lSm5IU16RAfm( zIQRXGJKPBaqTLV=8VJAz_|YcFP4Y_~lQn-BegE1RxvB5{;yeuta3HJb{u+O?2b{jX z$toy+=pN40D4AIEG|ODW@J7Hpn=QyyPYEZtFP`-5VjHShMcH`YEibUlk+)gSGxLdV|x~S(4)w@ zkpTYlL++ZQK{X#taLrS4{VcZGk$MD$yQ7GSiJ7~G)W3z+nSnJCcvEj9V*(rH&cl+zqi!+%sg5`n}YcJ(ol9?*+3 z{b8{f0__i@z}oi0>Y~mmk9b$Xwp{&<_a@aAiZh9%b|xKint~mU?BK_9St4@KBVbUl zUbuN-EeK0~BWx;hwH82-jZ|4qdnok2NGWNZI)Y`}PJ!Ov21-Yc@>5|RH2`47^S&%~ z+Jn{?CK7Ol%(;K8I=_>c1xBSUV=@+=7Py(oJQC<{cMBH$C|DMJgO%Y9`eD5i^sg3+ zsI%@8!b;*FDHz8j%P%UKas_cR3$hTYdH?L)@>t9NB|tE~O1?aeH?0J%nZ3RDsiJzZ z?^ikleLT|Uu}C;uXY65$IAF+7_KmonUIWJ-Vz|r}r9R!NzFr%{0JWeon^iWir+;J$ z&5qN`XMV?45AH$xpX(cxHSJy;3~QUG0dx|IdS$P=Rt`UKnS(L>a6UksClo*uoJXdn2xJmSpHs~uDtb|^BLrs zY{JUQ+C;>Tg-x@K{wYu0*}iV;r{E{o-u?RTr1mgs_g~-j0mDpoFqHvsEmI5abYd5E z>|g8w%2B{~hl>G`Gc1Y$A;lKwz)JN9o4@@0T|j_h-Bk0&V6a3!$@lr*#A|Lhh1_qA zm=CYw;_XM<*?Sk$?IfPzrHW@Ad)8&PU5g$|34)dHzn6EWG@yrUyu?+uVd+mlzgvCE z3I$hNeACO4yiW%!`Z3%PWL^yCfx->uCt9vLhY2w67aHnyl}Jh{cd+L2D5%rdP@DkkY*P4 zFSvGiT(p(9XJnIBhVN!cu3Y_oxyNS_v6zau53$y26i96=%N{uXts_KDmRM*df(>_B zx??=%z32huQEXzRvu5398^R`tx!;?Iz=Y?}=1#l)&O8{!g1R>?@MfpkZ>NmoNDhdM z8?)%oZw>14gbI?BKnMjT%Tr9gu97qp8fd4VqN-80PyfvclYazd^9NjaQ-GN4|j!z%ojS~N!9 zWQ=Krn6`dC*L+@qmhyeJj?boHBiVSmdp>7E5z+Q-MX|f$QdU?Sx+w_DrrK&_ALI80 zd}NR@f3KZ{bym8keG4xufX&>k?UQ*0F9*ivlder9#gTJY!jUM7?%BlU>j4;$px!!4 zp0B*zrld(MlcW4D*jW+*_zm;g^*n7xKjU&MOlFQ~Vj?Bs|60w1+SX(Lt2 z^|aQ1FEjSM1h*CX5m`St#sMV`8TvtZsES-OVuvJ-(SO47Bw}#wWRyv?cX_bAf?tbR zcFc0~qX~rOo4U;NQJH z2gPERWPd%eOTHP{jQ5Y)qEaL9iyJ?I*C&36rOw82V~X|bn8P~}%&m`D(klt61pZy; z-R?-p)tF@~;WNxF3<@$qjd<{>jEx1Z%;5gjz9U#XG??B*9!Xxw)Nnp<4_EE(%}V5yJCY# z(^S8Mz&-!|>JO%pq-UK{(1uG!R9Ka_GSaFeL{O)$y`?ZF-_k=-%{}o-nM-vBEo4hz zhA&1187j?ostuefN>3(F8u=Nxl^GzJgrN!vOBzv=3fHy`0I}v$&;JLLKy1GqhCfv~ zT@Yvo>XSeYH!;?5Y=pX=#;Tg=0*E(T7~xFDvAq+k@^Z;t|GWN zmWgkKi=>7P%WGLQbrAjvTr}g*)-Qm$ITJ9_$OT)5*@-iejtX1erVR`a9%5(X|2-9i z=Ya*51*%(tBhjA)J@bFt&yB1grfX1Pq3etjOp+O7i4`SSPK9R}GC~QKagb&!O0Y!4 z`gH#MNVz_0Fzre;l(@!3d$WID$aD$ulaDXpq3}bn@5Wr?Ef9(f7bs z!7(2fVW8GI%pxr8e^i0Wz(5JlmX+8P%mQFv&=G2n>Ds^yArBy*-W4wOMMr^J6UmWDGxXHJgqR4Rt!yAvaz?^aW!oDE3%R6y#d@7tq?XR=qIt-?36 z#9w(}o*rT;CWxIPh@(6uwG4iP#&5u1N)0?ER$H}zzLc*#vYwNflkoq8l=?SQpTbkK zOgdFK|1(QH(y=%4$VOI>*bs{u*^5pyO9@lMxAv%I*14rxFN1LunyrG4)U04A_g&QU zzFAU>O!0rmbpHvlf~tV0vHoL;n-MF?MdZ5^46c(Y7o%5Kc|*B*12n0*%2AS~B3m@_ zFy*|XhTGYRpyWD%g<~f-N-=9(BqUd#WWd&w1;-OJsU366bBH1w{QAQXAqXG=#p93z z4{U3?X>Q|n7f^8Sg;>ZFy;f4PeH!8gp3MO@r+GGVwr{l-# zGj8QmIXpv3AUBKY1JSdJhRF2vg;E*;F}^xAqKafh5v4v zf<9m3i{aP@dgHdkPje)C4k6)Si|Qv3QjlilQwT}uFWvJI!Vg>8Cf|qeMOE32`V(Tp zF}`U2s9Tp6Slq5Yx%x=JBa(VNP>Y5oY&xf&iUq?*LRSyFcV%hH+~c7D(};`!3lF^LKR`)X|G!ZnLX-SFB%xtxp z{Vln#>iI$lk>q8G7c#N?IZu;rW+q~@R=)gAR~Pomg}AtN$XJwqmBj8W#y3`DD_Vw?yq28z*t zcAK|Bx#sGOLqKi!dpBRegR0%egfphV+hStIBk=kgAy>nW2tS5o#om5iN9xZal1%@J!7Hbk{M#5G=*|V67tpCFswh z0>@#;fDXm|M460AbeEmPHX>1wOk@{Vn7Bwmc?i)0jL4Yy|H+vs7ei8${Wb{`k(Oov zL;V#Aa(4`p_Ej5WC^Eped^S_e^O>R{+L^vAQ-p_+xiC}Ax5|_!1um2!RAm9T{C@t*83O>K#x&9Sc{ma(6t@G zbxa8Gkh}-!k5O-nG=-aNT;_7bKG(^*+M~PL8eEAE#Iw$U0v~K47B;A#jYLzx*9p(> z>YJZ`c=O?Q`T9-N5&(}8k7l4TOAa2-QgG?35sXjP>jPoa(Io5*-$i_S*;Zj6UHp<$;kW|DBFoYXEf!TD*9j5+|aVA=ulMjFb0GC0R zkaCQ?;(pqHgY;Now27aE)mW;~IGM?uF_Wxr=YESE@{%VIVe}Rjlca|INOnZcXX(WU0y%0W*rGX`j4fkF0Zv%jm!x$bB;Waz z@;CO&0R%Qa#Y2V;wHxGmLwSORuYG32Or12_ z3TL*@;kF3BP8qyFSAD_}yT7>5X*jiyQLyHm}Qvg$Dr6n_zOX!Hvl;Bwo7og%@A&io?O6s^R$%ck#-9RMlOX zPU)lwg5?S_*lp16>ArmMS9!=myLMQGlcM#%8(-&bIQBCaT);lq6dPNI_v^s2bt_?z zy3U&xQzxP(=0{_3!`3|cXoLUp1{PI>KUN*1ya%}!VL^xIzswaJmq+W*(P4PpKQF+= z#TwOjcbkw>McZxraEj7>6?JcaR=D3gaX}@KV4nb6w_spI`(NhHsf}+eJoGp2>q7+W z1_zFQ2w2&sjo}-*^XbFZeN}>Y(G$c2E`@qQ_gA>@k5ul;|4x#if{~>9HwSH zOlIxZd?2uSQ#eq-*(kwPNN|mP4S&YzJ?Y>fpPNHo0Gb~8hdy6lBu=gqsNXmD% zYgG$jvgEV>0tcjSz}HlK=yQN$2l}jhrJzH z))w&-mT5#09{QOJrMtF>M}0h-sCWgHR`h;)$zy>m8Flo zbL}|)`B;80D~EUgi(MwhRCB}nNDtQ5m?E+4*)^epMh_+Wh(!u??E&J3HUVmcN~&3 z7jWi5pRqAWN*UzHmGdqpmLs-dwrE!C#-I8e@)-ez0P9oKkfek;f-~;~H4f>3*btGC znG}DJ{Q&`wjem!ei))U74}DHJ2-m=uVqgxqyggz*2v^`{y548YGTvrq4kK(Ds9~;# zXt~^{cWJcyqW=Gk7z)nS{G(hwZ<*dI3JZielMG=+TqQ-S>z@oa);8d-s23`!4q@5pO#at_q#9y>Ni+S))^Ss}eYVvv|t&QSqZoww-0R(BuuBI?h2iQg8Kg*j>KTtePA#{PRQXj$F|0V)Ea3rX7q$xMe^>@9RuoCNNpOi1{wbsJ}>*1PGCdA6j9z~(1o9I#V zxEDfyHr2iNCP^Jzhc|iia=xeP=Km0wcGB8HHQc{?<-;fF5gj}6>G?;Fik+=S%H!a% ze{qdHpbSa%)TcgV9tV!gA>x19_d0&q7cwR;;ZuBh=1`YS|El5@XAjk?r$Sb3tCCvH z>TXu;sb>(GP4|gG*Dh|KN~&KL9+8Z+Y_$#R*!a-H)L3k6C=_z)8;g%N3{0TM zX2v8|CS3vKZ%EG=%x$Mg3`*E2RcYcjAfFMYRvZ-_Z53Lhb1X_S@;(*hE&&cj&?i)3 zsB;>pf5ORs?|060lX0df12Q!@lhFkzlZ%rNe~33xOfUhQpc7<~plD}+0@+%>tuKnFKHCe7CQ_(+*Zs!I!#V;}CrZM{^ ze@mmz4YsB#nqq(7zJl*WMmVXWt|^RXwXZ#8v3G5^^P3&At}~4v%`@Nmwp;-Nv;6h$ zr`I=9PjA4LCRD)v?9r(snO`y?6gr;bRfx%or;;$u7Aq=<5Q!uG%cuP|#E%J*#OZe# zrbOt0VD*D+R)VKd-9Sr|=y8{I<^jcKf9!~~YQPyQD(n;+FV~_&*+09{iSlq&y**h~ z)%&{GfF4!MqqXOmxu!8~Uv~CFit z^13?oUDg%#zID{~-pzD)@JBp$?Iymgmnn-!>y!)B%Xp6)Y0 z64jDW*g77LG%yaGNfMrnr08~3MW3oeS>#3MHX1eEI)(9=6V3Ge-WjTkvn1-Bt<-&v z?aqJ+;3}$!lqYNMtgXZ%0@oIMf4modfHL8hgFf9@;oHG3uuom{XhVbmBg@A1htgEu zX3Zw(%yA5klML7XlC}Q|dX6T7k5tw%U;~r2MfvPyArvs8{$A1%nx_L;cxzCpQnwx3lpTf6wFFyl%S^ zME&fCx549pvQ^4|KvGnziKLu#(wHPl%(d==4ekow)*iB^HLj0$bLLLD^W5jd(d1z) z&su}`CK$uw=uy_ksGbXW?`% zTn&~zX3q~^Pcs^zprh^YLR=uXjuHJ!hy`Ut$)jh4q@svcqnfWny8sixBz)Wk z18R_u?G99TK^0k`lb(d?dPu)MC^HFOndf!0v1X5-UF}<&KcTbj%hL5eHrZkK{`30Q zeYT!ij+jp56P6g7f3Q6?HM;2oK#z@|@f5dx(V11qHE*rAy_xIM0ZhP-E?VV%i{_(Yg_mI#*aRE{RYlC^|3ujq#vm297!p zp~|A|T(1r%e-3?`p1R4OgT+q_2w)_2vDyY|WbXpCMRH}?w;hBLe*|-c1NdpZ*WZbP z&ry1ECx3Uyf37wprC7E=D8jwo;}quN4#&T zN4pG?-&kWZv7D^ zy>b&f(|Fc4h{H&`tD7zqsoTD)GSE0&7*EM)^m*JcJzkVq>^yGnku*C&(##`TY#*Bf z3LQ^`gSzXQ0){OTB96}gXzv+}f-)ruOXpE=9geLromCY~#1LG4ag86N z2<${Kf9mQOvIV<{AfY@_{Ka0-aP2BeEA{D?FyT@-47OP{^ork&n>>?I71RmGT4{am zeS}Wp8}hXdg=~G%gl-s=Zk*UqG&7{9>x@d0qC^-t}Eb z$~$nIU{HjU$T3bog^RGMa&m}+5NTb3ka&Qbe+-XN(3!tmIf~HEA|@%*-<7f5nf_60 zO)grIR$^vF;D24y8|%Mu^f&8s1NMWt(rmlc&OUMAcnd%dt3+tJ$@m{wcd z@3Cz25((GtlCYQs{rCDQ3~4zBk3Y#I8)phu}VzOc`SwZ|ZPZ?qrYc|Z2>Y5&ZjonD! zq7rTXR3Hbg<}rJ;PzMo)1at%pYLbA$CHY_PuKs<6dWsha0x5|BOjAaQNYo;)u73VC zUTk0zQX|yC>Eh9nR!A;kBwdt?+pE97U_0l@pq_v{3+Ofd9#5Vsq6C`NoGFEt2ANI{ ziW36^Dm2k4TQp{|y>K}9*pGaFSaRWTDa0%O;&3mS3au@`{NJT`uC>1{W=|;90P}EbWT7MFwz7_m~T0> zG>#+6?HUMve7-^s1a#=cerY;8fp>H5CzWrLVHFFVR}8abNBVWtR~{}^tIrmlhzJF; zen;aM4@;}_WS2xoB}w$jwyTirs8q0#7aka(9y0Pak_T2?n0? zPBJ^s>kD@R(HC>lIty^KL;PtW)%dbR_wi%g}t5q4uSV-&2<)4=4T z(bG`jNel`nC8xptuB`eM8C!IuijFau4Om>xA_9sl*61EGBWcJzGaE`! z-00ili$vEpaww7N zHLuNRe+HAY;D;EYal;GV>T^hR0F$1CO$3QXXnZeO`V^N{LEwhJMLih3$zo1}v~i|# zMBMJ`HO2v+wXWZ_7!&vmAGNM+xnKk0B;Ej{zOkKATu^GJqYl7Boo!m$68A9=d+z8b;e~+*rLWJ!=S-}fyKm_}!T<;iAl@z`J z1C%Ldo{p3j;7EB>l==C6PWZ!C9ZQ)jve?s zmM}9%LX!$}X*OV0GG{oCHQT1D8alX}rNJm@ca;+uW#q#YdBvZqs2_I4gKR(*aK48n ze=LnPmBw7rA_7=->!jZ}I`HJO2=4Hhlq8=B)Nx`^phJP`5u0yEhm-e#PY&*V5(%L#nH2Yrf9rbdHGxo>a4OLn(i1a|Ep9JVgFd-j1N;~|ZV#I} z=g>lxj?Ru$4y)W&z^Tw#c(b5}&OMt*UY#M?>U^S(BNdaEQ?UL* zJPO{n5juBuS3V3HUFiflHOCJjBX5-JFx6-?bP*@-0pMoS``E|gqnE2lf9doLSG_c1 zxs=`T{o>jNGpr4RpRsz8)kn^$RM6AoPHqn(6ut1|SSfX!9)V_hU6Eou&s3Hlv3l$~ z(&)6%g-KjS>K>&fy5eQ5G|S3K&%>n#_d#P9)boh5Nh+TR^PDtSq0Q6ocdm8?J!4kH`_X^G^e;YmtOmIk}P;BPm z(PJ^y^e<&M)e+KA`5t)^W%B$j2Q?~+v#&|ez_F!c!KRGq@GCI?d#laS1W+ll$sF*p`f7B`2p?R@W^J1lWvDmyQ zp8txax$Ajy#h(ftrkLDEwsy^voOh<&g_Wt`f+MgXO0(=Rg@tb=FKp|dA?0Iv2c1sG zkYZQ>A$HRI%SGkT@q>7ruwlX0dK0X3KLL;@57I5(5f1u1{+TU&G7wiSNYuh_{; zSFIMo9bi3qShb0#8M`yB)pn+~CqwO07UR8T$(8JX-vb=*x>!-N)^6hHi6969=klEo z2P8m8??!a={Dg+<`Nhe%KL|Erk_f5!=;CU`7$sK8kyes$X+{_G(d#iM>U6>wSL460 z%e2hbtJ8@vYMg(r=2%$dMTw>L6<(ij)6Mnyi|MOSy@4wmXIuYvar*Y+$8UdN28JVv zQhsDQns82-4r5M>(+OwEIE4$R<6^g5rrVEL=LT`t+v~K%$F1SqgIT)5!hC%O1&0Sy%FXyTU%kVsGQC`6*pE}B8hEk5g716cM0S5Nf~};~C}Yr%a<3osS?Nbk zClYS|wW+GH)hsPCEG@FK%(eyI%(KihDCYag1g@T{n*=0n|bnDtM zGrsY!&dT-HizxW`MOLCS91I#~SVl+|MR*PsFxp3~z06l0Y_9BGYrSjNTdYb8yyj(rFTY_zwv_#N%EsT| zlNK=6+q0;1ymz^7NdPyq^%6UD@rG`zT2;*p7@U7O%fZ80j?FAK152(~>%4A>R}>c> zI&sta{2Q$9h)A4>C*NP3{Bq)aOGf|;ZixgiC~XKcdNf;}ynahZb9e~K5Rur?t#7&< z388>BIFuKoS112Kg3)Z0kVJFs`9%QNBy6OVC7N0^!Sn6;PszRQ1)He0P?m60H@J@UA&51yYVb1BgJVrFO=yuv zG|@zR!}tc@#pI+APWmn}p@p=zYVn3L8R^EA_5!!#f(h3NF~R~2eK0pZxH;&9+B%3a z#~>{?NaqGyga42ZY75>5IuT%%#7W|PAccPfLu(cUq7e&lVnCEhtJR>lmXWa%qNIKJ zSclxo2Hk3gG#5kCC_-t{k>)=B-$jPgL}(QmuG7oD^3y1itcK7L`$$@TxEVrH2<8S* zk3do{njMnp!|>7R0r5c}>vesXl+Z%UhXwQQ4sI5dB#a-4n;{PqQa0Yq{*xEkOK^Y6 zAVX)1Y?-Z`3(LtZyma>aBxHNYhi|B&L+VTi)q#=t0kqT_okv8mVE2|J3^OPW?gmEa z#Bh<7IKS>zE^GTe#-TlA**kowdc@nahcd}STzC`Jm{Z;N;Q>ydU>+)t!wE8hG+U@1 zCrsz_CrbFWPy+ozl#u&%id$>KwEcg?4xfn~=x1gJX*uDEJQ6z?K@2x7cKA?k)7d@p zh0x9etJqzJHO12^u9Nsh<5p|-5X}88eCCm@2JW+8-QZcI2T3Ha*DgOs#84#Rq7S%U zClhOkt=UBX{u1|HR1l%%L;M%hMi6HCJ;PlYlGr1Cypb}2POFbMAI^V*$W4EXB7e6U z4EWIO6XU%X0fbWMu2QwWpu7_NGkebx-sp+?BQBCOMNp!*i(AxA8 z9jEwhUS79ATD-@P6IbhPWE_}DH8n$qmA^v69$;*i5JUUfTTh`DRo_RUL-Zw%5jl^a z6ZnJYggtYJ&%)-9h5a4hj6i>60KYlXl)x2XG-)!xFESr|@%*pT#beSgd1Qv1aAV`( zHDEXU#QJ-zukT|0A1T^CR=T-83A^kuqzb7ES9yX;3r1bVkY)XNN zT3otmeumsZrM^}8YKy{WUMFdr4O1%??c&Vj6>8#aT#w!k2%!y|phS&`EHgCl1FIf*`t44BXhL&Qw&nZSBJT-t=gD-9pB-j&HgVJMq9gOu#*V`at|9K zevSf*f#X|^?vobKKSwd#vduG~e{Jd!yZ@XRZGZn~Z>v9df=Hcoxg~~^VwNt_ZA5|| zMvACqr7%4QcKhgTdMcu+w{0&NhJTYD&w+W&gN9JIyY+z{jHrL!TPaUq6%^bUTl6wu z9K-kgcz|*Qd;Ep1*Sx1bc?Q|tW_PM_Gq`J+uQt09i^~*Rg7jqof1AT}g3jj6;0 zX}H+!&yMuN88J-uc4G8>1}>{(0*^3&fS1)u05wBbxHW%+SGfvB<3e$M7Az$hp;UJ* zg}d^3__h@hQ31*<>xEoTLNV zh=EZTSqjsfyWx1g04l?VcQFnK+vZuQNUzFlTg{k2B$oFM5I77fJj+pPFwg3R|2px5s2ouS*S#PAIVc&}bCe^Iw#>-t$H<Zg)jMGTPauu;|Bo`ELDcK=9Jqy)^N z0KNY{wajPLj#4}(j6YR_2+|TrmIVX_ieF_n^*MhXuKR(C=P+*aG(5cyr#t2~clv`E z?qjJ^J<2<9WCs|gm7Pov*wZqxpD^78pEy;?_V%v&?2nZ}6DU;M{!G~RhqaxvCe<6B z@S(KX;$aq-tQJ_6;sy7fSvB6RzyV>;*$8qg!esC0&n9)ppd3Z$#OG*>{5>9@iOgOa zX6AoJkLjP{=bU(Z2FOF#FQ=!Lk1t+K&t29=uyR@-;eng_5D}F7dh2jT&9|$03~mC^ zXg8aA@_~{+A(dj!Uhf2`gtPvUCul5NC*jWU9$bTkpLazlZ5BI!{LAP=SYnXy*(UPt z2PXchU;-{`Xs-qn2#R2WLI+F^0Y!MQ_jP|8qLD#vsd#_#^Ctxp)(4rZO7Y@B<_dco z9HEvrz;aE7fW-x*InC*~Dfwd@S{9Na-G-LyybG;oLC6YPyg8qS`YMP5VY>SE@#apH z;k;?u^JYD6V=#yQCIpWr~OR;L?Bo z)!WC6CPLbwoXggM>({hR&&?@EyvOBlfj9Mxrf7Fe$l`^*bFLA<@-^W7pF-DpIFnha z7*DLIVV|r{d!^nLdg4|B%JzI8zLyB)Cr8Lk{R+hWn}SuasaL^q4P6Gsf2IUhI~u0=k8zAc9<)WSq{nYj?mB z5(A_mS6TTQxWavy2jf@>##ce!cFVB9aCOx&Ialpt&{ftk=xSR@edK)TUW!ofLl1uz z;Ca?g@M347d|9UXs;HWTkPl;D`v||uD}Jc3+aofVYebW#fh!M8k)oBTh2a|?;i=N< z*g5FOUA}A`fIU?FYs_?7!Rq*b@*=~RVax^-0yj370UHAp0x~t1A%p@de_3sh+cpyZ zzQ00fzsNw$@>MSZu0Rr`L5iZ+>+T&6Xp2BA)4RILk|W94?7zPm4k=4s?b^lN2Sem& zG#n0}d82uB7xC!B75C4#H&@^NphP56CWX<_%{CH(vkZob(M;+zy4gg(#cH({LYnwP zSM2ZK{`BU0wN|N#-?hz3e~EaoUk4%-&4vco_Vd9urCt4T^W%3vh!n)K+!%=~@MtX) zCNo2_->lX~%J|UR4o503_WQaji=hJP?$vN#4D@PKb+%l=iuEHAI$Kw^CpKbNQzenS zTgeop4>YTqvOa7qaSr!hL6e^-C^XYLAq7Wy?0#uGsbD76@R4J_f49X3H=<+`|IUT3 z?Q8sKw0L;Fzq$H+C1A)SFh7XnU~`iRmdPY4cUQmt!J`el{4wH8|fBp{!eD3KyB@t21a?}_d1=bWQg*9QtDfK@pwSD zdEBqo@UVeM{teu_qU>7#<-T*0zFPn8p&IU&^o9_km`v2Ee|Nq8w4^xfGT~WJ{MC}c zjAdy)+Tqt1#l^Rn180a3rQNvuCSyn#q73ur@?;or}5I$3yFvHUQ8DmpVf1nH^+I9k=j|EhKoR z7H3ZhM2(U*f3|4{M5##PYOuSWhGZipQrs;WrszCZ6Id*9w6M9PtRPhsHBpXPfs7&r zda^f4NJYPV1i6~W@9-hwYrjlcp{>ZgU|r1~{}fwP48HnUi@+t+qy8#K}24eM%_c1uq>RO$T&u;R}ZDE$DND!L7G z@}YDz=0rL;2U=WX#^~VNFs8@WKEPm_a{OydE=X_T7>$YlW6Pl@$~46w>vJharn$@i z!LHAkRWXyxlhEt*6LHe@#6IsgX8;u>cTm&)j4-a@L2LD&rVZ*D^ffU);+qfRrJ5iZ6)F1b5xODD7* zf2bO7TDM?%2#}^sX?&(kf`wr8PoFcPnK@J&>k_$dTu3CnygL+K;br;N3cnbg1rSK2 zfuOdri+Yk7+JmzS_Az!xn{^=$DBHtu=q!!D;9|B50Tw9E7u|PNPv7l}61o0dfqN8p z*4assog8Obc)^bCzG&{e@YY+|(kcM-e}FZv)v zCl$Qv&7$KmVWxhkQW^J@ra-7bf{1(AalN6JM{vUgn!PE!f%05z5}9ODnNgSZfAl)}XUb;x|8Ee}5#(J0ZY* z+2z+EGQ&>yOfwIiN+vJ94N)BW>-5Fj5S67Y)yZGD4LRzG^zc1sCNg#TWr)laG=$=1 z)iUVI)CCtJI?tJaFaKu~KDjBJ1T<(!q%hFP!cKGL>xEz5zk9s|8E7f9v3a?n>N1J9 zZC$%0^H4Q+L^@tAq~x$jf1LFmSe~{X$$+R(srI2;n?npLI0G%HbayKzJ$ttzrMmoX zWzvw1cPpBQTgIMdu3%6q4^Ga*-Afgm6G)x?e5g8?M3A#3s&{)%s)RaCKmiMV!OYiD zO$!Ktxk&@s;n;n)5w7BqAQ*E*eRffXYLZ+tOwOzIq*x(f(oT)KH6mTI6U zB)Dzc?Rqb;b%kt-e^rz#)WRwY2Qt?x(WF)4a}W-W-C^dtLr))iTW=u&XNHdn80cj| z1tpG;S5`3bS5}EH#1nm7V-CiJ0$^7(O}I&?WRf}z!qj6B(R9S%@;&>>r7iYy5<`Jv zN+#jXc1zc_(1z3C`@fBZPyc%V>-ERK|4am|GUY4&O1a7~S}5%zf-lq^ z6sbCg&KR#UGk*gvve`F~*#8T(nwZm*ai$ZKQNR=fF*iAr&;==fy;$9E+cpq??_XhH zFA5N|q9{^Q9|ky^+hu6GAa;iV-B7R=9TQU|PajSD-|vn`$x2t*w8Ov0p7Tf)T3(k^6EtHH|6eo*qzIYpaXUg2}FaFs648K}N zf@Pfh9bhkIB!d@!zg@k%es%M5{c8PsyV_p=@|yVHpSFKp-);u}@1YF&WqcU|!TXyz zXPK0AGh>WSc8wuoZ*<$$rtzz~x+m&`sT*6BwBRgeA(53=P1C12nw{O-omZ&V$HUdj z)rOY#{#clMQ?|Ort7h~$`QYhuo-Al9LYFNC6H0(zKyu=LJ+|p=Yc@w+H|E($l;Awj zMN=Ua1-bb>;j%p#BKB2LR2NYiJlOJ%SWR04?GHZK_CSirY^;a?4V212bJ93B zds~`0m1Ds|754|3vv(;{$)YR*vtt&=-t1emH#LZjRM4W@QR4KRk**|@d=Neu%?@)4 zB%H}a^oP-Z4TT{o`>G}}xoNd68mHWluppFAe9w8tluV}Hhx?MpKbzD_DSzH$JGqW z<_O2q0H6hvc&EX(9zp&Oo6ZI)0Z32ezX_7xnzJuC+4pthGSD-W-ni^rjeB|h!}|4R z)-;uWFqQCgrWJN)%4P~C;~ezW=}HrKCZ!3{J2ikM_)sK0wD{V4ZcAMhpDxlUfKnA1 zKMVCx-FNoGMV~{kv~6RGJ<*yzU1|U74AG^&_f+K%kx@W^9rtsXyW|Ozz?#5=%Z)eJ z+x6yVb-P)=dvUwE`N=6Y<4i77@aB*x?LoJHfIy*_jRf@Hnkaodf_KS5j2l=jxcGK-psRNzWUV_f2N_mIn7JsSHCqjTdrk^xW1rZ1DJ6p6% z=PTH+e2Le(t?C9FkO~7V96XBi9UjuO9P13+$1Vh|N7K_-qiR6YC`E6!NI>Ir_0bX+ zq6n4c_|vL%yHIwtht+`$=g4U4NaW9dK^A0B97T{lhR(R6o%P*JB?C(M+C;qQal~f{ zqzG7@`vzC~Lv1rx2N592s1Ck)M$*l*37Vczbu!C?7bH_5Ob-!Um8$7jILzh$7%L`& zZjXnEgbrm@25GJ_ZU~HNcorB{I!?)keya0R(&GA*qPVJ#6suXh^NEW>Bg!m)k#Y(} z+E3L$oJk86^5A;^=qzC#Qiyp71U>8_@4+?)kmqQx3Nziyf)Cou= zeeL}hGWzqrBw`Vi2Ju_Ou}^X!`XZtgc^inT&RrycwEo8r2kRTiTpnS6WaY@=SMOeF zcyWGwhh>=0&JAJ2WIX&tH;w5n5;A5oQ&W@PQ}APW^>#Ex7+SX7ao8edqjT`d1-$jt zF!{Z`zWn*}w?Ag{Jz-o9?^|0QyOwU&b*)|b;XDyT-L4w)kEVU--Cco?FcAajSHv8R zj1;m|v6RcHfzIddy&l1Tb2mBdi2X|ch36&n;HK>@(C^22*Cp%C`BHTmN@o9p_ zlaGH4n-A7CXP{2T!s1V-@3Ezo^%C|S&Pa3@&ByNln`Umja+7d>ratc_V0jOl%8f(# zB`+1|ItBjS*m7rRhbjh-76lOqwQ$L_(E599f6A1zLqg*W5JI`U5szLKz#e%~h|E`} zQ2k#!N&nXlun3KT7+i-JBCZnKVX43w64X5wm2GJ~+&N_4jH0yxU%R;varb@Sc_KC6 zou_Yc0OHj3mt~X@u{8A$Rh~@_t+!A91{KyxGm~+q69G4u@k9a>1UNM~Ad}GrD1W_H zO>fjN5WVlOu+&RdYSP%wC!D&dEJYvzyGWHvwQ>`uYbl!q+i45Go^k9z11;?W;xxA7 znR)MdW|Rst3|;yS9UKN zYflfYeeLX-HB2eKyR3ORz0Isq9WO*B3cG7%$>sA&@_3qDa@E(`WoDAw#SO1(A6SWI zOvA6aOC%iEwsU*u+_d^Tz_t)fi)$X-mwG7|o3~6$al5(fMeP!N3ghs_;eP|noo%jJ ztq#ZZ;DPP>UWNy}R{VF=|BdcVYi-A*7DZn(`FGOy%|!PI-1uS-v|(ZvcR-kJ0e}In=C|6#Xr=wqS^sNMI?rB#38cDnxp|@rQ(n!bxnyP2UX?C)neSe-|@C^}0m}EhO zDT=ZPUL--qZgzeVq7ptrcsRSAAKSk zc!!~cryRpL2x$rthku(&L#tb6eP?WB2?K?Np)l~qgl3^f18n0>^7~`JU&i7MdkTQ6 zl?Ar|igwr4+=^>}nC>y>R$$YULv6rG>y9MTwlRQx3INGo08g8$eUd`DS0ln9?@;Uj zP>v6+dsCmooHOT_WBt=+As8u-!@e^BO*_cPab90>_pZBEI=m&nXT(O#m$xQ8bG${z>#Da27D)boOWB5lX0dK0XUbTOaT-FFgG}t!7BnPf8CSg zwhe#Zze104X3gc7MCtmJ&D>otO)k^9?ernZ+~_QNtyY$N`dHt#)Bn8yL0OV5Z*py3 zY!DIy2XAk<|G$5@xqG8TAT(1tmchd|5Q4KL(m@<)CbbD3^5ExCF|qpf z;rqKcaXMYAVrGEiwCHVfkBe?ze?Q!Oz7fFX0Z2B2rBXO?TF1d=fAjONJjh}4`+zf* zCc)7y-3M@<&_I>Jhnv5WYV+fSiJ4H*T(zPqx>uy!+z^?vbW{$P6!_xBPa;E=Y!4dcD0UV6xyoa1>K^5JEr6iwsJlL0sis*Pw(%4 z`02a9ynFxlcD0s*^YHGDe+2Grx2yBk53*+>8r$_vMf9=gc0?U)l^4|`4L0@u(05r^ z)KxpvC?3xp4qdIaoSa$J3nCmRYpl2P$u>llcH7O5$HUf3rv7KiGpNUpb?)WTra>3L^uUIs|3gWsMKC{xGbZ!0mZl{lX)DHR(1G(cO657=L?5$ykmD ze=6(EKTl~@H$#q8>-mh^dNuYz9kCSRcW)vnM{c37luUqxnCV21-f{kLge}Nq)v^WsqCM!#u&nC8Y<0rEUH$KyB-R&^7ksJVjK!Csf>z36A z+SoWmV5_w;CVbbC%_d^0R$wz#WT#JVf8jz`7QjRCI0V5+pr=4k66Pxr+F>f|h?<;< zOOHia(nR0dSBO*YLYyKS)SCQ{3-D%ei@%xq5{6ro@b8F~L$FLVU4IY>1%IX}0JBa_ zn4vy8R$&dygPZmaX=#r$xoCH^YVK>8CgeVy(ABig`34zaW<#v(a43sSwqDZLUqC8T zG6_@@6T=fUBpD2WsXdvOqZLpr&#tniMH`3%M~smJ1HyNtytNg&10;JZ3;)XyBuiBcZEU%EZoG2Diwp;H(4pVY+x<$&9{BhEH! zJM<~;fjFyv@BJ$?bDNGK^3bc#es0Vf7}ZT~o7U6YVZ|e)ZIAlmu&C(YegXe=joo|x zCTner*%X`b{z8wbvK~rKTETt+z__dXGADYxIT%1Fc{X_+sedD!?HrGT+%uy5tw;RCn>o*NlHpdaADhd zqO!!l$Zy=_G8Jb5FhKu`26^p8?(Cj8W#;k)nT21J(Qxlz3>k>*@usjK$fZHI6bFzZ zmCL+18Sc>0jm??{qJT0R7<4vLPDdmjo4P+NEa>V-i+>dg;UX5{yNcM0p#Ng>V56OI zx+@u{W3vk;k;{PD*w1~@V5mxW_OKMXho;|j#6}!K_7Dolo8A{rjWM+7sN!))Ui1|V z5rXJ3e_$}w7MD9%9SK8$I@Q*<7@M;a>Ru3JG%RNm0Bl}>qcmag02p>qcj&Zi=>OOhoo^=<^ndC25o3M}H3mM02?F|_26KH?%&+L1 zLX(;31$FJ0uE{ptLw_i3yQH?zhQ&Ib>y1ZephxKf;B>E~5kxHZ3+;{==w_eRggLh! ziB$qfJhzbom4_SH)cHPd*ffcMnWjVh^YduuCn1;7&Zp5%nCj(|OhRvDEw*COv=zgN zC4byTe3Cj2vpg?w<*j>MP#1~(vv|doE?$ASj8{E0%1HUEqQTQDf*dM)>z8etdhf;2 zpTnc|_4?wWN<}lM0>+?_8b3N9?Q;aEg7Ckl5#sO(2i4F_AT`Z+H$ICNrSatJ1Qwx|m|J(<>FGrQ)XX4cIDf2BDSh84E%m0r_h_09FO~EhJ7LC0%os9jsNa0 z#ca{*hnxQbQ^O%flX0dKlZ)dO0yHw0k*xzMlmEy{0V$t%#kqaj-q! z{Q7$o?BL^1K@^%S4PJ@#7{GnX0#pW1H~(bO=GUnt4wZ3p(Oz$LvdW9@I~H)RjB%kc zb`bFO)+i*sU7N(M_I1O+2^O{T)vl;s7*h+X$-APiT1Iv?ufIjgRz-UtU84(w&UIHR zjL?)?hq2Ax(>;F*G|;ArD1XUz&Qm)R#MXq)B=6eO0BK}Kiw?{btZJF*Sy22iFwjvL zr&-YWVE=(^5<0-fzOSz}`bw^AhR%XK&ix+@_}0o%rh+WYVjWYE0Uxd4P6Jw7HHP-T zzf^mO0H+=8UMY@n_gmzm5mF5O^>(deSnuz7zb`0a2+n`Yl7Z6@a=6*n$5Y=)yiFtP z{*!|wfFyE`LOKZ*sJ+$+SVo_vBY7wTR5B%ZI?R%6$N-4>t;cv)A_oEx?2tlZ{3}cG zuTzTuTeXrDP?7?Qg)M#O!`n5)$m+4IiNzLE@jt@Ft&WV{dAV=fk!HaSd-@tRQWnB>3(EQ(!>*=h1%h#n@A{@1RCHk)>A8wzJsq#Q16l-*_pNUjm7h*!u_b09 zF;Ge9zyy5W@%dk>(zmD(xTX~Wwgr}>EhC%0^zwf_Z#)7Cc)SGf06=xp@9qF?lT}q$ z>*s5?;n9iPaJ$Vb2A=sw-t^U}@w;M+@sU55p0%DyEn0&^*VSg^fV%=uNebZ<{f{`w~+NMB|hMDMh<2{cxo0*YG+m(Ic=_4YsxFW z!dHv|Id@B97VzC-_VY4HFstH=Ji5bj0+D}uC@3Tkd<5^pweiXHXsmT84MlDbh432_amS(fp~Kx_C{KUn zU4V84C0!$D8Oi#WyJ!GKur_J6n%i?asbj#!gyX-Db^g+ zqvGX&96F{r>jvNUjpvi;+zW`HIS<#C4^}{m6<=@X&TOGK95H&8nxTWd zDa0S!yyu()QmiItMb9phl2_Et+vb1##OB12VpPBFc-#24FR3WPhJA?8m}Ympzbd|f zn-<>~I?AE$OZkK)k$_o6e$!lBrLD=%RT^VD#x{dVXwKY`h-)=MT$dOga}202sx*GC z`);^KiYa1e1)0e9OF2F*&lXs-k1PeUbVu1s%Gdt|QNGgAad_%`; z(a9D29%hlYTgvrP<`PU}$KA!aG4dLS?PFeTu|hjQ{B6?L`CLOeHi%MFMo;*Fa-5Se z7C@)z8;VY>MA+jIX!AX_PjfD7ykUJPoffO;Wq@IbPXY|L*s9IYP3ikMnv5BBHP+w( z8EYsNo2#=t)`XWcvziWt5VU`gnlv< zq;&a)8@6AH_QTCEfFc#PM8xIQNS#7{)3|~7SL#TJWUJ54yc^zNn{=Tp#n2d~Ea7Lq z3BgfblTwtY-2QlvS0WK+Lj5ddsuw2w>Za3>>kBeTL?Vw&cx95fh-iQFqd-&Pf#Ol2Q~~jT|C(dpyyNcv)EAw9I_1!9zFU7}oY>%aSWq;@JRl;j8w06k#C78XWM011 z4OG~poaX-r@PQjC$8anq1o6jwQJ}6VzM>{jNRT^}?_xHf@IQ=m9A;Ulk}(i}SbFp* z6GrFX9N#Z-QSc;(2d;mRJP1h&&&dqtC!&Z#<>J7G#$-$tEiuwzl0hm>$R3wO%0d^9 z&oFc&6z4DdMTYRUi}d^Yf%$YYebk}(+9M6h=vlcYTi;)MWAc)sy#msmYjbD-iosLBZ;*c&E~&i z3>6&|Zx|M^4OnOcz1%2s2+9N{tM1mPrl#t&>$iaD)gO^gM-x-EjHs9W0nrhCpdK1_jQYIVEryN`XvKey1EJ>*Y8O3}=uk!GV-@UF1>)Wd%K-n1gfVog`Hv8!g5dUl^k983 zy=Me9ar{8FZhfR*Gf+y_U2*j7TF_?)$f^0mgA})`zdvALEh5y4p*9PFoyb~?A8OCrD7N)rpia~Rpq-|`t5&nA=nS!Km29U$tDVuF`Uqu z1_f*2G3R86V~n&)NLiSMTD}(vWt}{daXYvLggG*%kSlS9z`6xP z1cxB<2C{#P*XAIp@0+6S3LL$Z8pb}Q<<}oXf-fPw7-kXSE0?C@>A!<4TNOA39I-{iTrs|@;lp@0Zh8to0Hl`}{C@~B(=JcXOBZ3- zfD%Ka50^}|Irzn2lrGfi|MZ6vTjv<9!;lOj(fEJWGJt_%_3)s=GSD$%pya?sNai^N z$-ziPuJ8=YpAh_fk|GZE7iPk_T9f#;X>@+DrP8?&DYgTM$<%p#>1-EgSp+MN`!}na&0fKOCRwZf zdJ`owICOQ{YExU97LB68dA$)?pqi6z+n%>Io~bOA^&9RgvnwhZ->+kopo?9lEL(44 znFl|ZwMc`9H4iqZD5$dGl1Z76O6ne$Rdco8$UF-40lrBXv`6(sqrIW8HJ$~Qh|-^o zD?_&PpUl{Cv%S~aS@n?R?L}>Y6@?BCkl;^1KaAVq79y}-s}4AYDuxe+U^94 zU~ZmF6~L*s_=WwCs_fcJ!*SJ$B|co)QW=Z{mf2aJuu6;XDCfs}}uVsnXR7 zk;K8NXd7Lk5k!)J!91BXEL+o9Pte7Z7kQkq6y#34_}%CFzNqYfk|UFpB`NQjua*QR zEFL*XoO*1VqHNz+>ZIz$0O2BHVU)ircoW453q?9N#q5M_cgAp7I)EoE%hK@x=QFg{ zQe+D-$;BNSweL!`Bs$Glo{63t^SAe@h14k;%ufmemuQ!c?_InRfIA6ac60;ReCajEFjm3%nf;qy z;-f?AcmjVgfz#oJ0|H_hm?I!ioPqKC3;|J^g9aG%2nfSD0zwav2Zu%}EXE6#>n?g6 z83WF^YVA83>tu-O`4FR?cv@qTGkV_>tUBoij_T)=ua;uJlO)y+eHDeWr_xX$6A7aJjLc^S5#>t%cy zA4Hb1D2~4c$%d-U=#q;F^AhNbG;{VtaCKc3;x7dXl4gN|1l>b4dC_#O-)o58;u`?Y zQA8da%!P38yTm0kK*Wd{cpX4$rT=Q;c!A|s%4|~cU}Bm*Jzl`-4;pG6nhzH+$9#!; z-rJ-sOTH@4WX=SS$8gwWVHo_fo@9eNYeWW1lm|^24i!F30bb`fu^NK;iNIqt0ep%b z;J1OqWQ&~+jMEU3O{6T!)32x!g7J9pId%Sj7;O?Gv%ol$3AqkQ^INzjtS9{vcAxbS&JIQ>8wS?dH-s}F@+9|Jm~@;fby(= zyFg;mi-36AOfp@};js_32qNr@dvD_CfFRJ&m=2cymGekr;}`D{4_u2dFh6;YQ7OOQ z)EKRK82oHJ0sBW9zFKJ~eHCVWNS%hf{INi5cU{p?)oPXdHzGs-;KtXj8IjEiI?&g* zOGB9nX9;@s%Fi4GmRtr{455LAac={Eqz?5LCr3ad?>f(CXkl+?Zv9;Q^`V!*$=K(6 zCcFsbjJv2)6OAim@UdR}Tj9VF+QYJI8s!AHMDYKlEJ$#GDwbPpWCja8o-3mfUU2Im z^!cw9Z2!=dxH|q_Sx`*ZBYH1C10#7+f~R3OT&cUaAuU8k&LW!{ha5+wESKxrxGz#Yr~L(~BI(AC6$!8|=iow!?PY zwWHb8pOz@bHJIs3enpm3F|@7dv7o#R zqu^I{puK`pftnm5sK){W+{Gz<0I}4(A?=lym|mnJ+_UyK~;Vf{Dq8?Yp1e1aY5M1T2J+M?`O{ zOC-9OZ!iA1;IQgBz&YeB_PJpvW#FXyi}xQqX9q9eIUW;nw9?J7D)f$K_Y-*a zMB}$ra(KMFo^6Of4?#LCR?6_7#t{qR(3$!^^6TTQ8tg77Sbfob!BxW}*=SIt*4nO8CX8}3`sPtK7mvmK?)tI7FYiPP=BYtV!T!n&h z@TKqgCFp^PK%99du<%&y#hijhz#`#0Dgb|{1JX_>o7=0a`NsN>9}T_%y~vZ!$%2LG zTqH6s0t+RYvyh#KOs3p5j%6JMUVTQBOlqPhNg@?U6XD19Jn*I?d;X>#wN#(5k<@kx?@!O53mdJl~}s>$A$)=ZyXpSsWcd2;0J&1 z!nh*51&dN>QD_;olGWBCA;<(nklFr_XKB`aPpblAlE;6PRw2Sk9-c&WyIQOk+r{j9 z@%Q=byY=O}`PKFrDlYHA_X78KIwg10h7|VLYL0{i`n)GiYpdR@$eJw4;k)c#_+c9n zo~zQu3N*U-0Z20oG%c&@cxYaVV|A12-4K7f@xeGm36TaKjS27sXJmg+YykI>WrBN> zDfxcLyik2xJY@F&8P`W$=qgJI1BD->rY_+8Li4)ZTc2+2TMxx}pZu4KT&!Nte|rW} zh^>-C)GSeKpK;=waDt0znIHEBt!2edf1zc>hR+h6v9aR^dXia|$6{B*5v9UNe+qxS zfK|1M+hryM8Pz3qZNdRaqzh4SuO$^m_kr1G1uZ1>$g`T9{P_ZUy8Fro$Q_nUl-rN4T3gt z=<7Rcz8%V?7ekW$gCe=~JO}~7;K+CDH+ReVa{cQw&ucH`2Rg!D@5}1xrCto)+irci zzFocE{2%&qcir}#IkPuX&qIgu0BXFZQsYbCOT0u7u@LH=AEQ{IV&?VouA6_^YH{@p zp&pA$r$(Zm@CVX-N>il`>GBaYzay4U>)8X!mFp zh-9B8rsVmam?Ven{pI=Tums?K1R0EQ1P4-Xv93(Gz*IWVap3prJ+s~`)@oWYHyscT zi#7W)HARz2(i=E6)mA6=;#7aQ0f*G4xHdTlDTZ);oK+sP6cUY{sUZYdwkh_wtp;$R zj~Ax3MU`e))T9O*3LO-fm^$uh^s`}23jEC}(Yp@QL0w^Qx2{B$zCAP!+GlT1ZjCHTs2UC}eKX|#WeOdDb#Ua-zA z9mOFel=Pwxqvue=?Ue%K3wzb257|3Zmo>Ukth(wkGSwq27{4-CzsITk$1hN)BS5$W zTbF!L=6)-Bo{2>&=5f^(-qqAfB9ttOf}tb;5duS`-7c8#HacyLmzw4MUhlG`(fQK^93AaZdf*-( zlWLbkg-=v;4Ta>(IJnQr2TlLzpB=Y-n^ObI&~fiXLI{^xogQnO`r#XMPVFHY~Wv^qUY?@$*l7^^^ z9~oWHT<3{RK)ZggYRfDoP8ug_Y8j2_u(Jf=DU*|W(tS9Zd;>-{JB5+SPCaQ+!=Ot= zJlRV{TB3i%es=OIob}TNvrl0oj$xA60jc2Cn8Q~XQGm)1P>drX9CN+6>(tsi!NWE> z{GK+4$rU|JSjNHTfFyJiS#{wj2H%+Z#H!!4dw;oH+P@HdCj@i%Os*_^7ROM5!Icpa z;h)Bsh0KejGwqS_@B^i^?f)#!6uX4JfZHWz0i+J_hRl}(BFKOL4N$3a2bU3U0TTf* zmjN3C6az3dHkZM70V$Kr<4gf%lNsbVe*(&>6uFW_&X^?HHWxqq7)K>sejCL?=UH@5 zs7(a?qyeaot}gz|qRrXKG!atUxoEEMyR}MJ{VSGmu8q!wO7$q=og3C0o!V8?^^T#^ z4MkOZ-gGUm!#>>Bg%4jKo1WB*AK7rOu9PB1>nSA?lfR^H94Wxf(xC5SvU8fsf9fbP zT3DUFXhIFBu|64bBqm{%%S6wD;>Acn#Ujb_sCUusPh<<;0%Xj~`WmgC$n}(l#4u33HcS!Q4)A+(Ar4$m2KDJ+goK<+kMe9^iS~d8+HZSzmIKvf+R!# zT5nVW>*Xcydp8{Wmf?aIKlD|5f5&j?{I;)dPkZt`1lacwdg^v>;ru-SYng-WzgDxc(on|JN5W7N68R)O_L#9&ZlzYS73fnP-%f33Ah4X7qL zeGvXNm}rT;6$ymLg!{>GkJ%^SwZ@L9yJNd$RgKNUdwJdzWm(}>QS-h6efp+2V~;C6 z8v;QYMh-++mn;z`Q*%{@eea5L)fNL7-NDiPl0 z0DP#f`AN6MfI>lNkX`VOm1p>bRWptsy1qbN9q-9O5wRfyVQAp4?E-MiZp)ip?_5B~ zx4`ckAk`60S-p>?aV2|L{M5BJS|uxAHHX?!_6L6VYlY89wxolhe`B$}F?P~4X{d)K>j0uw?^v)5=9dj3Fgcg8*w|7=42>ie(0dUJI}bQTEwMu#M;oZp~T zh6(x4|KZf9=HsL-Ndo$w@p7i%6kKSmZ6HOUY=fP}O$7*m_hoblH)g)gU zjp^?|?%DN&=aZ(O$9&-3-qnZQvF1%^DIiEw2>!S0OpzDjmHSwrTY+u{d=tz{Qp;3W zh{TyALZvW3pL?5o+@wqe4!2!>Y+8oFrzCLv7&6H7Rb4ZL!R(m8{k{m-;6dIk#mDhQlWM0jxjGcyQ?7)Yq2se*k9Kds>Vi+f4G}e7#IjqbgbQG6gdvgyKkUqA`-L56-YVvb~b zos8}dz7BO0@&$yAP2$!69H_r@J>yUzNkXfE07Ur#1D#-$J4w%%A=U5({QM5A;u#0) z??`B=e=~qKF4w1W|FvtnQ#*(Hos{5Ef99x{aDrytp>dv)j zFvmMe_?}^QO8B_0p`@O1g4ZZ6!3cD_MQs!mOJzQ(&#@M&!l{E4O9LfzJrIhJ5;oe1 zB$4BMZ2!;sSjW(gK@b0T%*RMeA8i&4YrUJ90sa}=aqdMF2h(3N7K0mwv*Tc z_re4KmCk)YHm>Dwiyfo}P9H&-dEKQtp%W6oGdOfh2C!NfIV%m=b}lJx@@e~wB{8AP-iTT;Z%kN~ZZET69vh=a*;yfK!}PF=ZoM+(OCgxF!M~70z_*f6@S8 zrzA{}n*DOKR0Tsbq7^-WXE4N}numv)JqMrQAcgC`2nUmR8Rte#^&)WL$X*^kEeG6WPDJq*!wN^d`QMfM0+e5>V_L{_uu!>2 z2K$luw($Ie3uc`IgV<$UU|`7!or4L^xtULB6`Tkzu$CeSAz&T!4>4k5?@S<8aBM;XVg`(m zfq?NJIAmO4o{|X{m{^?5*o%pdFbV^r$No5Y6N6&HlSn!mn~epd1DTj`Tc?WIV;TU6 zIpIzWCKhz$fX+gG#z71@f5sjn846x6Am5OI9cYPk9&(Vl&J!~(>UkjS>~II0C6xn5 z^eMQ_fI`BU8EBKQ`$Urh=>?k^F+gR)dS5P8Zf9;h@ibcma&)K3H#S)X8# z9S44iBI>oj=m6*)TY8_;@y8Hb3a`wXyb?5W-YHQBUOmz-osY2^v=c+r%aQ6cQOOWI42WSUQ-2VWl zQ`jwq`Kn(w>+6ff*1R*-FR#x{^>*>s);uir_Ww5vJeYr2f6S^^Saz}6-a>C)htH}v zi`(_x=6Z2kn|yui-;4Ll`OEcR=AAB{Ye|MnEHmF=ELKjtelJ$5HKux3yA;KuE@ju2 zx_ttn?mw$8?yk1=@1K{ePqXUfdh>p{G|WiecWzu{!vvQHuIa0SD)v%cD?!VfBhpY<#2Ha9QFVbRoL*WNoEnbhGR zORy?~e*o*!EFgwrHDf?juqF6Yw*KSt^VMRrg~|Kr+z+xEs6N_&=jXsv zf<3~c$X0VbpB-8a7l4c0#VbAvNAcY*YCmb|e-H)?P?Or;zVJwASSyhwc$*Gu?RI}n zkJs>phZf#t$D^%Po?5G>-Q%oPQ}}0DLPc0qzFjfcNPpSiAi%KvQfhthQkS$2?qYF05@l5RY?rN_Ebb6`> ze;<3=S1&ziO_z__Ps6er_VcK_vFZx0t*$a!T|A|_JaX28%E1#_l$SokHzLjV38r*X zUu31FFX^A^G&}G8VN!o6Bb3|e95dAZFs7;8M?#ENZfYraSFDXl6nS624PbkN(>+~K zLk5kyu~BR4;X&ZG5XXut#+Im}?dJa*_lX=wV?!#bfs z#_pVC3SzZ$lE{OIbttMbAfsz^7Non5;7b#<)+yJ9H~NB%p6{9{x5Pp;*52YyfBSkE zxog9H8z;3t%3wHrRS4FBbQ*`W%}DKX`d0QAQLvzJOkAuMkkJ)epo=8lMxVbGh+M2d z%A_3Bb7#$Jky=rR7A4CmCy=JZ5)yn$(zC2=LHU5nNy|FvEef?2N)x8kUp1yD-!>c> zYdBP!yeeag|Sg`gEPs-_p`S~XUxotH`#)>}=VP&55jCtbZ zB)3_ad$KZfODr_lhj!;1UA6JpopO7pJZz^2Zu4F_eE+hG?+LB13wLdpfArV_c?RTZ zD3x81Mewa(B_R_pblR&<7;7J(AQh7Ot?)e*1l`id!-ubc1?iWR1F;ZtlOU(D#IJw_ z`Hu?+a*6tuj|N7*iLiKYoqj#B#}bltzK2{|=Z!qP^6xh6gP<|imP`B0ipyAA=1I@2 z0@8B*V8HugHRcCN7HN_3e-Tx)3j+6qI}eNA!)5gJ)Z+1+YpnDnw4^6)thwB$!Ca<2+REyikgcPX zWBts^ThY&cGK?vI+Xz=23Wi|xuuW?i61JZ7gDtxc#j`1B0V!5Q;626Wjxc)kW0_j+L+o6pwDX=P1C8>j>w}Sz zS}yGm7r5t#3&q7JF0;5k--!GT*iQAGml19O6PIAg0Tcr=F*cX+umUNQCG1XrDdCAE zkU~e^-P#i(3{qIeItpZzcz1>O&W{#LA*A--wE1px^WRsui=|4m|NZ@98B70n*!9a* z-!U{_2!C7fy5E$h<>gz#>NgGi6@Jkze!ct8i|<4N9D_`2iOfT9DdRwCdhT(w`S#t z=X3+0{b?anyj;_E*C;Z|D54;WVwTa~Fp8Alt@0|j>mu)SUiHZ1uv;wQQ!?m%sKdq~ zHg#^c01?*!NR)qXwt0KtO_Bl^`y;Heq}m*1vasz$f;lnAQov0>&&e5oDXEpeM!}?( zepgm-ZL|Hrf32=sQ+5nkw@uAQat4qBA9>A0)JS`lW!-EWQ#b*-vNr8XMuzTi6CiOsPz&K`}MkZkpiPY)4Cg<1T6TTnmAQ29K7vFr2q6%WEh6mgs zwX((EoULf7z#br=O?3>4nPn-XAPo~oE&v|_(|rY(R0^fVw`k~DFcv83n~uLZf7W|c z3~2@a@x$YabkejREw)3jr^fon-J=y=BP8-yZJCqb(S@vxQ|DmR7|p$J8**)OR6IUx zlKlYSkA2yWw>!&!reTjp$4>peqw9k`Yn#*v;PT`cSFi7`{kP?;6f46nlfKnVoRX{ z9c4%6KVjPVes?i{n`Tbl-vv_@jR~{G?|B3PO_M+LMh{@y#7xE$WQa|F0+w!?<(Cm| z0TY+uCIl3dyYxDLHBWqr(xE7j95#zR?DekL^waHOp;)x7o+7z=84e{|j+|_+ z+3p8J~{rijqGzxeiRm5a!+~!Kf!RqGh*WaQbhru5M97=*aH}xg}>bQbd8GJnZ z2Qf0;Pb$S?&ZmrAEfV3#6iH}_N^XE>mTK)*ErpcKXafzwcNU-2S6P`g1iM{m z9cE=u>tWe8>u6R+fSasqihHCZSaO*#9dl@qd>sKbVgh;GE>I||Zs@P-3Q6d)qH5m& zNQ*G5avCEl#bYXA%uxwnN9BTt9$oDxHGrfg(3-Y?GLBT}_f^(B5M&_Yx^OV0J~|?M z#0M0!#ZpHRT4#gysgf`o8n(2=Sw;Qpz9_q8;d>MMkxi*3vFVV|r*CcJ0lQsUtg^1C z{rXkr8-p6>NX?uefio%O(T)gHPGmS%bOQh<-Bkq5Jn+u3QI^20lFGXxzz;Y&fqzvu zjco~kRy*c!V*LfnXSi`Q<2d{QKRWyr>1fU$BxgF3gO&fes8(g41I3!F@ULx`U)OcX zwtt&*5H1W;(HJUF9WxsU3GsxV-k_+uQ}}tzl4SH%;DEoOFh-h>y&EEXt1@d_FNmc` zQwBVdIP`%yL>&HfzDRg@_2K;D4FO1PCM=78wzJ>|IdzBmv;zRvH4K&|sl2E*1ntmu zu1m`vN68^XYXvNU1fGbJ(g3A~PvfLWf|RAPh(}pS5om~eGL}R18iP9*;8&<>>1-p} zN6)1|2Sv7}W82ToZM$lU9l1!XbQFTeZ}xuS)Ii%D;o}i-Mgo6=X>M37#EY4Rs5irZ zbOibYq+`gw`1$7#jZrLx|D)0bpQQ|TGaR654;i`wy0mlnfY$ghNm8jmKx4ybj+gjhWeV{r>M)iRT)eCix5ph!CJS>^#qOvb-)j z;-f7#mCcDguyZ^p>;_|}NH`C{J`!(#NHxs?NB611A(hKvgNd;qq5mXGc^dJsA4uf} z@{<0=wD#3WYPkWWM~9@!P-uNxIVy>O(Dap*V+5=e6ZP=ea}8|rc;wpkp68m!d$tQY z)E%P~RHrn6#Ff}b#weOWZxNwhV7*Ea6X+%Pr)$dh;;jjM# zI(f8W~LAt7I_!m+ItoO@9!N_@Bk|MW!zyP^(u;MHHm7 z#;A$u$7*l-u{tz;7U&5_Q#RrnU`~^dd%SYE`jr9y+;BwZ&<*2kTi!blrzI{sqHd@VpsQbwDm+!0f z%mxBWNaG~X$P6OExEZgMfAM$%yi|rrEkpTyp6~N zBAQSywvppnK*AK4fz&BW#8AegyYpC?5yzDo2zFGN!I849sWsMUT(7cs)Rjff9^I2|ny>Zr5`a2n9}@jByBjpKQe>%p}RQ+(LDNi0Uoy zDd9HVXB!W^+t#gb-Y&EnDuLLi3K%rDw#=#JY7dbs4dQ30@D-cB?)|F9Zb*dCc4aB} z-q@SEdBD;smYzf!UShj$=Bh*XKD*hK77K<_yTTrqPEAWJfp`Uf%+_LbM$7Erk! zrMIek-4HL0?o;Jmq29p|PHEu(dCfMJX z1&0)&@nqc;o9zLAwS#D>4kiF6u%evxttH4oT}h~v3Yw%n z>{fM0jo+&pNA9SHs>5$?8@q;IwE|1Ber)R-kC~XUO*%^Buxm1$cDr!(V=BT71vzMV zEftOLf8tV6$^B3J2@p2WC3xtnIapTi1~g8w(4q(gjq9O*!iAl@u6(OlA8lO2MDXE% z*;VIJ0LRM-C;k9&VxZIkRpYD6&<0$@&%9m%OkqI^5;sC_oU}4Q6#bVyN^6_cKuY(oWp*L_sF2ACJ3IuxSX2a-2lat*;P zap9L_^f}UhpL$HRI(Ce?B0jziq#^EmZ}Vka{KpAI*3ej2A&mAPPh< zFVDJ+Kv`4wPFw&jI*jtvM!UEzve_pMh}JyPL$ro-ceQ(%i|P{c2QH2tFi8AZu^}(g@GQu_Y>Q}j(c?kSR0SK)iGUQJ4rQQaS89m-$7I04ViE$qS^=(U>ED9)$ewXHsdO{6e#z%CA#;7U|@ZglE!v@ zQ~B+-K8CxM4&)NoJ&}Kmpg}V0#y(GwoAhGAoX|EmOv6R}1=^ncp<`-q_+}}L3=+5y zKMn7D^z1UO0xJFsd?i!bml19O6PHFH0Tlu;H<$6S0x6dj2LTZjk>ZP#1Oe{0xgfzI zMR6|r;qDO7mpTUlAb(MC7ev92SCJoIZ?10M#5~}frAmolvxjNK5+wtzBoopEn_cjJ zB^hUY{c-d2%^RJLsU>Gos)3gy`>}bAL{~p-u0CCHSd9YUZ3Gh$F_cmV+41WA$0*pr z=FdUI;xq~F9qBQMnc@OwW$^CmAEew=95;#?uBXav)`?g>o`2S%NLKk%`62=#{Lhgu zqL?LOCcnz>zaiU9Eh!RU9uKz3n%s8zI*g54SsMB_Z^B*vA>y*iceK{6W9}{k16?MX z`D|)jkQafsl+lcf_@&E5flgRt5`Vin+bK_>;y_EuqDZ{NQ^r6V#b=WZ%s7mEk?BZL z+!z>W!?a0*CVvn1pV5ubsQ?J6kT2^dp6K~g<}KEqvG(RY2apZc8E>Ma;r zu71n24z8oq)LqAI4UoViMc3x#p4N|RfyU|RtExL9*^*I~Oc6`X^u_AG{PXSGXH22A z7^v#X-y6d?G1Ge&)v52U5hHxH?NI|L7~Pt$oPY8#&uVW28fX!L?FfmAU#^HJoOxg} zT*Q6!)foX`pEMGyzgKj&?z@W)-Qf}osHO+#f8mk9WR_>F}g#|z4E)mLM3!LOsuni?DvCn>=b_t3kJ zH0yd>(%QbSvaYBrFq%sAb4Cj}l(Kk^z?Y0C60=}Dkt(M!j!rF{SQ`FVLlD}guv>6H zhI|+U)Eu%MRpm#oJkBW<0r}7-kpNG5dw)%1UH~+`@NHj|T{uuc>L5RozLU$1vYr6m zTN35wcF9Cqu!O5|s@vuJCF95#oPBPf5Ug-bmvMwhA>>OhrO7|_MUx+MikV@1Nj8TI zK>qnWsADFRWQ-gX)E#WMoU)j?P`3b@KxMzhzw>&JHP*3r^=Ux_I4yw`Hp|ftq>wPk z$7O$kVVgwGQ@DJ4O(UcVnmCpGEccUf*`cK-Z;PFu53DQ=L#8S2`nvbRw`J{4XUOSy zTEldR<1s(h%_Gg*dMS^QN|GBW1XQ6YTXHkXmauxPQScuqW2l(-0Ki4JK`SbwbKHXt z+39i7ZYwqykvK4-SL$ItW$ycvvh2(dhk@u|Mvc4lAMJ87` zqcDyPi0@r2!PG?tjNNi-2jA;U?Lm0>`?GAx0!hsD#RM3zOEp$7C6XnchtKmwXqM>W zFHl}-rY;Zlzd;Z$HdTO5V=ka8cX@xq7GI&WCruc~ipkh@=t&~z{9lVIEBl>)419OA z-GLpxUwQ`2D9<)H6GPXY@}>955J8s*z?}E&DFtMhn!oY~uW9LwYM};H&)yYJ(>IHC z>@66iLSKNQ=dEPPpZ`!2zHx}_ADa_E1cp0qxT zsUwBw<=NL5Zz{z+k2RnnA(n;@P!C&3R<`*I=>}Axe`-Odis5lOiJZ1*JIW+7bqWg= ztz@`$O=T$Q8}C*r%uQ$y>;=bMrN_60tGlo}&#=g)xI2uP zokZ+R|d9chl4zP0&P?oPfgVpSiG0lcD~? z3>vc5Gj*{DBO+=E}nC-wAPxFqzy z6PDhQ_eHt8{EPJMXN$nUfPV9o{tK$WaMG87LGbhcDNX%~k3E^fdq||4#)i?j}O4SLVjQICvSLlleHO- z*!f^EI3G+-4S8M)9>*?D=NJC#mgGO4e}H7w$@7OJF$U%Y-=6ArM;DtI@aiq7C(*4~ z3s~FM)qzJqhXakm_z$wFcI$jBv}K_o$|>rzqQoFs8m0YB(P*p^E|E$&_;S7q5`UKs(^YWy zU|px;@QOF@@sz6m1r+f-*w!!ybMyRjWTUwd)yj!TnLb2|{ic09=3+e;qsxwZbT0&c z5`zyI=Ce$+h@&0`{+=JIxziEBBim7JtNkXgL2QYqJCC;wv+6BQTYM3?jD|RY6#KRS zPwc3{Qx8n(qeU_fND8ESxqrz&N7A6f5a`58L{d)RI6oAGa$tf!=u{tcur|6%hOaqX z|AD-yhQK=X{*cvh9v}GfMt&u@bwvD@PC$1wke#uoC5)7 zhO#ALfv}GQ0?_-CCsUY!C4?~;!08o=0}k@QbAq4~xs4CEy17K{KuJVPI&YVECwD;z z92>elzt4jnG+<0uG6M(a=|pM_w>^$bSXM0$Ktzn&Q=9XgjW%rT^M`I;{(QR2m`;y` z>oOZ^3fBS|JzTM)(tme#J#X6%V9E_$xb+v_$pC|_uCp#V;{FJgn1o9q8-9OYC@z0g%#4$XIn@|KV>};?J{{NhBP68Fg+zxIp}xHqSQ9gi^>Lu=N-Iz+SH)vQssQ z>~k|hK7FDRtUV{XgmIj@Gtp-yR|e28IhS1Uxho149;2YAUVjJ@KOMyPBriyM=>uEG zLIa476DN6SI*B&SZTIN^)oJfDW0~h0rtFd?qAoa5+htbXox%)moyrU=TihOD$K*WJ zVb=LrF~!9u)O;Ni;C{wO4JJX-W{rj0rPq!VyzC+th*9B3UMyO_S@lFrGvs>NfdJrz zmt|7k`6$k|Tz~w!&eNQIavf00#NpQ>n{^yz!RI@V_*7rtvY^kE#cn8zmlj~_s$`x} z$w2X6d|h}rhNUKNSuhF(ckV*CI7W4LnIsZcx$$jeXaF7S@K|$oT5PCZf)xtz81sq@ zceCkUMp`J@dpusGmtwxGNQ}_34{e__&Y5X4^9&~tJ%3H#P*@6@j5Kg$^Xp=)*vTu08A5f%G-g9bV71$d> z(~W9PK!Uz{o+~ZKC8%JnAw15s21{xwqEz!2Mo6KUE*{y?AkhGw57$U^@$Ks3*9$aS z6u_xYmn;?mbAPM}WC}p{qK;7zCn9o*S8{sl_(%mhsyGIn)0fVKik8e0O%9dtFJ})O zz@R$ga^!a+|;4mH}shlCy` z(M$Utj_YagZm7qV-?W2s-?n_fUvL5l?-Fp(PLL7j*Lm7OQB>`WYI5wbS-q}twe_{a zj0W7BAmv|2ar7kV3%GskN`y9vM;7YRlu_aKR067cAjgGl-TJMcPhEe9C9RR+zW9Zn z1m>S$A%B=mUyHHqcmb#{Njg`K0FKDMVk*1@{(XnWANX&N%|Us0%IS~tyoK6@-1T28t)7!yCICxAvjx>~Guw1ZVbdNco7tii zpj3Ah)HFaTV6fHDsc#;zPRI^9Z)LNLptMc3et&BHqHMLgZJ$#kRYJLR^cem|I992M zb^0=(C}@MFc{3hMtAu_fk0nkx6aOAO9t6dW?f?)C?)i5BG8|)G)>kJ`P#OIOP&R2w zA^FFRI6~oZj_zPsYJ_zU!#6*C`{DhZBS4x+`p4PTd|9cCh=vE0aR|*hHdLXmuR;N5 zx-x1J3Riy2blmO0+&7rrHdUCad&pC9_hOW<=Uo)a7V_hzflfm*LzRw5!Ha(bs}u9t zml19O69F=pVaWj$0yr_3A%p@dmmwJeO93R8Z5aVLe}HTwndBr;sW@2g&OUzTK?a9! z1J0tv2Hza(E{K>G5_V;9efAp}H&rK$W=6zQ<5mkR!@K=rDQ%eF*DpCJ5kE%1{&~G-Oq( zpvi;H6SfgHwKdF|mLxnhgJfS2ojdW035`y1gC`#VsKvY_)s}t z-c*}pCo5BGIL@C|f-r)vxFA0ex{}b2T(Isbzl>Ts} ze1$(c5kET_OA~2+(7a8ntn}yg=27Let49{2+T}#=(I`QDzf7=xeyJx_hqve&0Y3!{ zGvX7ri`Ch`HI)*Zml19O69F@q0UHAp12s1|lTo-Smxmw$4S$t)kpcP;w@Kh4>vZ8s zfkj&cvPX`UYNXkboH+gZJ%^+&oT0~dlLfjD#)#C>;kn;vl6*)=^7@4M-_zB}cds~2 zsK~fbELmMAl#)zKktiuLCh}ypPJT_{aj~S7$@KNE*xsDJd2z8=Dk9R`rr3SKN13+U z#gfys+!cMh>wlgveqa6c-7970xS7f`Mg<&gvA*7&BGk#t)yaQODD0AiCW5ex=iHtz zgi5Y9C%^tqk~O^iDIpm*I{9otH;M4b>g3|&UyhgXJV8_@bsqBat|=BQPj_G5ZXJyw z|B_}pX9J1P9Ec==h~Bi{y4`IimL^)5l9{GDBIyYR9w`$MNzXt) z`!XLuu76`#@{EXkBs>9$Ov;#qCkT}(8Av!=EOYRUmE$x|*X?FO)1qqdYcK&~K($~2 z&eq4%d$iCmUc^>YOU|s|Vr1zo&*f1e8L;V*@}gNsh0U^D(WC4^XpnN0byJpWM_1GK z_>iZUj$*fK)r-4bflQZ~<2eUji3oWH`Ro9gX@9UD_W<@&-MO7Vs%G7}N3D#w&VvI47DWdn&g9(Fx zHlrV%ov-x0c=8AE7%@8AyZaDv0&Y9uu{MxpSqveA)HY?`S3h(17|JZhxX2iRjAAB) zN<1twYqvUmg|3I0gF|>M?`(wF)@4)>IV3J6XH~=d7iZ_cya$QIInV|~lFyBgLw~7K z_>h}}Y3e zXXL)!{n7a&{n4kbJ-V&S!taE!%d^HfgQ7ITT1-D2D#TGXukReK?_*KfiZMSPl%b;2 zqyG;zS?4``5}eN@$QP#s(goQ30CnxtO~e4gB%v9EujBMUS*wH4MIoYZFn>i>J<9sv zpsS+meNJY8Uker{xvKeqSkPm!;HY+PWf{Onnf9&Q3U!4$F5v>muIz7jjYoyc4idM6 zvD_n;hqz~&Lsj}P_d%GYkZ>!Y+#XT56URcrEvb9yGZ;w0fCJOLVyfTW+6u-O5%@sd zkBc_=P}E&%BQbSeg_~jR6n{^V8Y&@rV2}4DOOOH=`k@%aGRFC%{1F{zLbvmDPeq-# z4bD}T??reB7nWa{{s1NqpX{uLnSqM4uH6Gfcf@m9H5gF9f||~zKU<^N)67tx8th(d zQP;VZ%H8iJd^XiDurZ_-lSc$RR%x@^AODoApnc_wHMBK7wwIo|seg$Btpw#4^c8B> z(2!{8g+`VUI&XWZ?=-^5g~vQ|bljsv4R$!S#K)8!?rK+UZ9;O>1u-WDnjfXG;j;n>(r!R=o@uW4n%WQ( zWU)z%*AWyFPB`pn10;}LrpECuibEN(KQV}_iW{iXps#zXs-WdOR27F34!n2@GjGpd zzK<{cmzW?S>*sWY{6|cp(ErX+NuH~yZh!0$E1#4DKB6%)3n?&Z z`h*bhwZwEN2@e%-%c37&p0CbU|Bi772boFyKoZqjM7aa8$*CjzrrbSbdQr0hy-(Gj zi_h9M-5OjgA|ECoRWNZVA6x0iG@oZAU|2&2R z>JIi7a&d4Bq}gdat0Je6Jsp!-oicXJw%8du=KGU5lYe%m{B4ps1rd^buQXgjeBtS3 z+tx9WQk7|@_7u2>J!6xKCp@9Wh&tk6JHDnpubbh zB)c-Xet&>8cbAE1)Nyt>P%p(}WGIZuD17<#GCa72osVij08}On8V0aWYd9A_zx?5O ztoZ`_BGa%TESLALij2kjgt0_y_6QUOGpVVhc=BlwhUGNAzJEesrh@4kOu^e63~K)i%7g}3OfTQu!%Q`B}E24H=;+u(G1$Mi#bldK(3V?CavilO9A9E142nGs2p8fJytqhJQ z5cgEba5zc{RXF^HA=yVyob;RHq3+4ilNuH zQZ#!3$KgJRH=e4%WFHf7*qjZH_ak@qcHV#kjst(#_w1GKT|a6A7%xihYrf$j%1e<6weRJ9KFfQ5%Ium*my(tKFUgWW zAz2b)l8%x!e>n2%I)6g0?5-a<@mH@FQ>exgl9+#wY63)9f4H?3;%$_S6!1BfpHN085B4s;8;6naK7Zv z(ayObOpUYy%D?ah7sQbxl!+slPn1nMJdTo%b3W-Xf5fBtF?g~`p#wv4p%nn8sWhtdXnnyn zSEVS0adF->eq`3MKqWcRaHM}cey5%j41$BtfS^jdX9p+0m+NXaS6m(OiMbY(#@E+w zwXG@~e`oGtH{rmS*SfI9WoKM*;jr5Ebp^%+c-A6l#3@N35~sk4a83&8FL`@J>8iAIfD@R3#`hDeo2Ts-kT2R#7!lIMCAyn9{SX z%6-#Eeu1smzFEpN_Oa=fv8&prV?OSaK!LuXf55f*Fulj}U9)x{Sn>0zB-!?5H~kU- z$MtI!;8{W#l=YlzVmLd)Us*EzDh0aBWC(2g*Nr<<7GhIOX7fbK zc!AjoKIl5#jK#JV>~gVpzA`3Bng|$3=wfJw7aUZbrEU+B50J3-?=J=Mga(&vM@5P7{O-2bzC%i93)tQ$O@ahM6=zjcC+xB z6{X*oxdoRtI9qlZ&lk?(`La*TB4Nd5e-LNd4hF-Xwm>dy*3niVDN3cGrM3`+Opvf9 z@g3S%4JKL!yPt|>0t8Zbp&YQ~Q7oZ!Xh%dLn^Ur&zteux;}d1ot?{iFZ|ZD!^YZO) z?>iqn3peP4owGE&|3(jKxS~U4b7l-QOddw;e#$QBiD{yTKF)>awhkZz)8e7^f2932 z=JAAB-g9GXIY@PuYj(_IU~!1CV~tE|XxaLbN=nDKf{_^DVp5E*1)-FLP?;QG3_>|b z3&*#dP>cp*z=u}@IDASntlE9| z{DND?QO$4z*MQosjRTHr;W>dbf6r49J7I9kD7_{4^|IQc?5{h~fs}vaQvn@g>&|>hm=@v0Wh7Mq~mEbO|u(Yf5 z+oC$`HxF(xV9niLpc(D{Y`-tcZ=Htil(B$elh8~=f3}~}4&io_bfxMre-4-l;JY~n zxhc%iq_6bw+=Pl0-pBjXU4J`lJ%&RHQ+>i8EVaH{KgAzFo8F*e?}TUWaoi2O=HVt= zpT5#HTuIp|5X0<2o9KMDZ+8W>Zrf6PZ-EWfW>eX+bUB5oeen&-_fY3ACT~U~p51aJ zj<~JIj1qz2*}CUU_&(s9e~-^S*Y26yVD)~)wZ_q$3nH|@BX6$|R`uUEIks}+UBm&z z01$cznPE}FwLPW$EZE^N;X&R&@G{$M@|AU=Q>TOCnQW>|2qOC*7LS(|p6aNQXmb`_ zM`CIp5j?_{)ET5AES@wIcn`3N?w}1WFg`^LQq=#<8J#Bq#G%zYe=cl;LMBL7ljI+(bHVAmPoz%4W2ouYyuJdK9aEQCl-Endz zpE^vEDzL{Q{Y0#kAyOrmxs4+FGD}J>^DV%gP7nqYP6*@{6FFx-Qs8TGi?PZ9*bQ^r zB+zSnKB&LG;UKs@f4HeT^fcSMmn8_c7m<|*U%-fgXb zxp2b?b`Fi#UWeSvE?ed#z|9Cbrew&<1&8EX)q)3mUM;Tv1OZu2HkV-p1QVB^OaT=H zF*PwZlTo-Sf0*BI+cpr#-~CtUX|RilA&ekjj}YqfY+% z9Z6A^Y}rv}1z4XXQS{yMc=zQ;rpdR2CT~Wxy1tx_E?#q%Fh;DDkW6QIONo&pQBn{P zI+$kH4n4p;9ZtfS<|$O_U*2n<@qSX4^JFVkU$jl)@>Sg5y-m z>%2x|46m%yXd_VfA{tPdC~cEfp3EMIEflcSl2{O@V}paZ4h>2HL`gY70*_rot+B)s zt3df}#jxwf-s6S6j|`}g-YtjfnpA!df5nLTHAT%eg8U1ji5)IANX8u%G*tO!vEE> zSXb|zw`!STVB;Q^u2mFvNp$E(E9*u&%V)HSH#}y+ zjV&uh@~Ob>5x~bk=8LNdnDIPJS*)uRp{`lfvy?fFta22j3DmfyZ*E8*ktS)0;caEw z4S` zV?gpaY1?H+p<+P83?q*aYKl4|;k3>G5X(ewp_X8XQTpWZXEf1%?Y)J17pQv?4VQQ| z0UUo+sk6j}k){HHkAJ@U{drd{;PGMV&YpI+KXxEh(BP;(B4dcyX+hmQzNfIYeM@E4 zbG|L}VqL#3m*I6epRm}}>#D`C$et#)*Pb#Xahp*VxxkXz@gYBx4+XZkg<<&6+0yVB zJ{-b(n2=p92bY=t}m=DiSVh=v#f+E&EEgx?G-+ah+fGB@@ z0OG0>5CIQ(tONW`5cFI?#Ne3nwjlnmavBgBDvKD#I{?Rzl0EOURo;z-;LewN`*2p^ zzARie>wJ}7=U*rjMSfk41%3B)Sg~S#)b-OA;FeHsPPc^&|AY0_7O=u`%dimG2($@- z61Nvbe{BJd-^JVx2Z84&@bM4`22g(;x*-ssLg`rBz@aeY?y`!HTs2*-@~m{Vbc&cB zOM)#pdsfq#;td*{XT|l+9s_py54gB&tYJ`wV+T=RMkj~jia^GBvpd&8I~R7MJ5!3nZOY{CV~*sKP-V_#(jawG?T_39ePd8t&I1>9IBVRJec5`lZ@} zy#)DvMFA6bXQ0;)_cN2aLlfZ^m*=a#l^?2)0j^L)t2&>tOu9}>oXBfg#KH+?*Lk__ zwCj`>dfKf|VXUzZTgboE)CBqA)BE>bhVZCX)WlgNg!i^|S2JE`v;5{Rj9p_PBW=gT z6w7kHD87Y<#VpLf&DLuypPGNe8P;5j!?Vsn-G2b)!9ej~+odM-p!*UKbnJRfFyq4L z`WUf9Dz=}~R5CGs@xoo@QJ+>5*!=_SaKpQC-+P!TO|8$A8}r%GK0EHQ&W?M?{2Zj0 zo112wP6^Pyi7Bcis>)%rJOAD+Zpt)%U>xtuX8$D&AyeRqoN_$HbwvzyoVO<7q53ai zKEP?05pDq!ml#$76#_Ifm+`OyDVOdz0V97TC}Kha3IN7Zetq{|@J1a;(N>!@)+Zc+ z#bWPy_VyOwgQKemM=y@BKb~A1ees-;h;YQXq|wDZA_OBXcq9c!l&k1s7JVPX<>Lt< zRKzc;^m=h}_U!z4BB+cn^CGQo%~-|d_3?y}IIGg8tm-GnKU{qM#dDbqol~h0B^;NK zIRPAh>FVhFA8<5-i(f|=u|!87tm!J^-q|ubKl;v1X8b&kB@$YWCvyTZA49y=?dxk7 zMzaL#k9%G!rwc0$7YfF%Vo9d=hnwUMS`%F0mOTo1aZm( zi1!wTHlbUSdB}-Em~TP4H{#%C7t(_#YJ?dZB8`qGT!{F>&6viGNyS@_Vco$Jcq%M^ zE{qgpi(41lfeXJcqk=AZNlso>I z7c+m#B^`|~*ZH!UbZ` z1JNc^B<~Xe#&;6IoSVC2v#@1O3d=eyYr(Zy)^+Btr>Z%bjK;yS-E0~t@t z6|CyKwh0Em@6Z6orWo0aJ`OWnmRI@IvYn{FP10v7T&%KYT@|6gOxh$G4C?jLWPOrA zo@egl+~yVxnrfZ7Yd#De|B_YZhSfY>)|mmEC`Y`xbK!j%0Gta)L1-0!FjHexvU{mA z=xjO@TlU;z^2)i-i-}Kr2JHsbpGjSm(I2=!S4v;t7Ete0L`zLw}0GTWak?>?$_qD%F-q;uKcc$t?~+f)o$xB%Or0W z?nhA;lataqE|*zaxI0#VWl`p{8r^L+357C3Sj44-UQ+C8HW~D;jF54=32?&hC#iH) zK#P??2P+hkZMQktm?%}Y_-VJkbhqD|b>|nNTX2M89A`JzkcG8vmPDdeJj?27m0#QT zsYS^qg5Oq6M!9~L52F$qDYe-;jRIFlGriF{jeTGwh}>wLMlPU#=MZ<>rjZkdv=Upo zrZJ8JG08UD(=EhtKmf~wiIXHm9AmI|>h9&_{qvV^&M)2v<|1H|#jIbL6QZH2_xtf+ z)-Y{C?(XX&cV@)y?&>20MK|{JF+&uoVLXg89ORlCvc>($z6toyz_!-?oCvS##<%>{ z)q0f`CdnKB1~{C5$An6T2+`xrHBg_Qe);;vk zp}ySCMP*7iXycI$q_)wmO7SlH9)# z@lhd~aLKuGp05n@>s1hX2;EH1K`#f;KYRN*K%d@hgPz)d4;>%32TY(Jz{Yv*x`{2& zb9fuQoAlqF{o_0cJ-qObrb?L(7Z;YmO9T18t1fC+A&1ve+!_3N-+V$mNexA&bCJ$w zdE1^GQjfyi{2H6=ugc9Rnh`fJCH(31)a%QpN~Z@ibn)_=)6bout($XaGI+;4RWp#m10-T5{PmdXu7hW*6DJ8yCsHfy*hnlYx@>6VoL@dhd=G* zCBUTMl={#9Y!{^%?xj3L>5Gd;N?S^;?cd!YL;y8^|BxmD2CmE(wXAY1G>f!2kc6{G z62>I_?ITE7W_5iC19G$s9xK~@E89Cv#e>$byQ|0jZR`Kz{+OzE_DJUebn*wKb7LKb zxeRxkR51YLfx1oTix81XpB87Y-+Tt2i~c9=i+vLAr7q*|#>A?Gyz%>BU#rDI|8Lv| zL7H^K742vXh0ufc9{_ALBm{jt3KR}<;qzx_ml#MqxngN^F! z?Us`Cm`R_UNtA8Pq<;l7iD7z=OyYYoiF{Nd2{BA09=|mozg$_`vQsjJxT1&blz2m} zI&cbki3N{{5=d*d%PEfF!5AiWAD>h}peA*H$nHS3Ldwbn$4&_xNzmyaP;;Smz$B71 ziK;A`?}^Qca*Z3Q`R>>>61F?oH({P6h`^nn4&og-@lZ)dtgr$eggB#KxcBYpmrp`s z7H$%@m!>VeoGx`iV}R zCZ;9O2t$E(xP+ASWIZcNxXrY~(r9lQr&Mz)IO&x_FlYsqFf0Kle|aGMlN-7&2Q6+q~X_Kd16A{EdZdG^Dlh1f-%^D0SmKFcII}b zq-08<={B>=lkdq}e`GvMSQ5#|I*ZbCJ|Q^|no}7EcPMil zWGJpu!FwL^Rk=;`qK2k$Gqc+rcv_vK&Sj`ou*&LXmG2ttX%%37g?1{^ZAMO{^dVO| zW*p>jUi2GH88B{Gthx!`_-Jqzim07&?I95YAMH4b;~@e*e`UHkx4#D|!RhWiv_!~M zqzI8Y7%nsrov~^ScmCn^_aA(ML@W|rST4Xi3M5&$a6crhVirr? zOTwjxoNe{grc|J1I(7VxCPbqNy}qu>a^u-WOYm>~1TTOPijSF6kiJ@5dWeB_qHe-B zUZ0AYj5>d~fAjj3)N;D>{-Ks$c-`o)fqw&Q^3fgK~yZv63i zGt9FA(C`%79EQ$Io_vlx`JT)0e_6BPG~#T_qGY8%BS&8Rrwpwg zGPL|3H?)3MLyuzDZ{;IMSSY6C(~i*&QOJB?*PX(WG(3hvOh_Ga1cXIciu4RrIhL-B%fK1XYx# zA~+6Re-R8Fwg8N60SM)%8>IpZ>t89l`WIKh6yh46Y!PdTuCfA))h1tp)@f!*L%5Hp zv{#Ztg(DAZ`-|hH{k)w%5NlElLpe)j-xXDiE9#pT_GdKk>%MNN)z-rXYH-_8ugi*( z2s~i1cuzU3A*wJH+~Z6#fo#DaQx9wS=UV?8fAz2mjU3d&LL2?L^{^4lDDi(<4{Io^ z6jx6+oVOm<_)n9sXFGd0q>X?!K?lqF?Ux*(uQrwdm80jZGCn=iz zxl_fGag{tdclr^O3S%a;dkef+=Ww=ZNSrr0o!9n}46k> zw?N8H9iav6+vIhFC;{8Fh7&Nd+oV^3e=v`uswu4a8u5|}L~KBbTv_h$-;z{Z+gJ4> z*gD7K!^@Kd{L@qF9nJxtFwxY{$CQ|g zNNrrXpd*B_Cirw}gU624nY|%|A>4!e>(fxLg@MNC`NRRCNz*O_6;>aQFqiLX${szkU`vV zFS$X<=V6qfnByfmEE(2C*qhxhvz;SEH+Qt5Ya9Z6-^E>z_B*go_6_a5W?s3jeY4v) zXWl?j5ZQbT>4zQ;fZTL5P(qMlaiow_EQve3J-7!N?yD8#H+6495t5lce;H6jh3ILU z)TU`G+1Mw;);(O%NyUMCJye@hjfMcr3(ySaf0w1O&pPCtqwG~0Hx``S%RL~2#0?Q8O8Tc44$!a{{`jt8Yde%R-zf%88{nWKjJFlFhYCar zX}h(H>nLJIC&S!Dnv`+kYI|(esofGuS{OHB+qW zGpq|E;J?{dg}t!1QwqnNO$(NkwY?p)J#p|`2FjpZhJ~&enwav5f8d>qXvKkb5%|5t zNLbqW^tYE^dJj9vME?%&FcITMlW2rKN`TO?pEkhm8+!b-&fF@Xf&mBUjJ{T&E+Fje zncx9Ca$nrD9vid-WjkFJWicN{)KKlnA0vGB2H(xKI?is>?QWA7Mc40FXlMWd}g2Fx_6Sjz|^pLH73$vagi8U^@4|>NGK&sB-KjTSz5Y z?akeSnSSS`+c(7e(7-uwk8`f@10`k>_q|5`t5*ij4D5U`&nC4*yW|ZG>yLX1Ul&?5 zV1XO_XRDjce=RCp-u5S0ZCL;QH}`(f1r_i_p+hRD(7w?23I%1A>h@WueYHkRTFeOl zpy6#I>TR4Uv7lGe}!^39u{u@te)B~N{&HhLEj; z+IJZ3XN#zVghiNz6+pUH87dsidc8RL7wO;SPnQvH0Th=pTLKdVGBY$blhFkzf8|+U zbKAHPfA^>0Bi%*_K!5~!oqLGeB+ZSRTw~o_W-d1wMuIH15~-3@Z0FN=7YkB^49V%- zTwm_NBX9vMb{G5Sw=j=xBOblJ;O_Toe)0U3(2)?VNYf;muVBboo~kHI6-$(k=F8}p zc+qSx6CHo9YO}nY$s~*Ks(wwQe-eLmTsQUXs=1uOu-TZhzWjCm!}C{Z;qgv0mS!0o zZMpt<{nHIC^F>H+0XAn7OMu^9=U$BEH#Iy}8+yZD+oF-Ilxc)%A-Tf1FC=b%|$i zI1M-5UBWR95jxX%w5C}RWz*bNi*iH6ckiDO^~Kx2JtIjUzC_Z}1x zniN89%f4wl2HT4w{u%7w?0a01gYqV5ZdEXuj)wf?*6`_?j0qUH74y(-z>w$pu%z36 zqg5vc0TU_kg5AC~n+F{0xT=X}O^E|9 zh%9BytJf~*H`r|raPOWs+JP0_12EatG3w^)#Dnn4;Bxl)cLDPn9$?hSp9&tA5nNL z-kP0iO}~dS(bi7T!j>~w@#Qjwc-ZU>eNtBc1}PI5D|z05e`Yf0jHgGXs9%>oRxlA$ zB}-yKpHSpz(6?3`m>DQvP1|^jl6X~aI)jlXUF0FrMjI{zJ%X7HB3$NPfcz;OYhFS= zbq6vj0?Cj34urroW5wEcX0_jt2PHWqEGc}ObXC1DLxDFBRQwnA7J>BHU4#8N4N(Hv z#Lg42aHyfGe`#q+pPkuOE%=1R4kKI|eqYWn{&OK<2(|&$p+JJ~s~oCrmPU*1#V^0| zXbF=)M4ZVYkM8W!E&NH%c#=n(=;q>|kJ$P=6DVtG0`<9&=XgrAo5HN%GZpYFf=nlZ z;>5r}W-NnyYfZFzLN)@3$VLE|J1ZL?IVB&-_L!+Ee}Eb6{Ov~cpsf_XX9`;>Qltq* zE_Uw&rLQ(kiAi}zTN)=b z93qnue>r19EKxs{4bsS}*=!m@ymp<1Dmd}x<;#~u(y1a+*F$bC+a+co$e{BzYQ49m zM5Z=K1;C$vb{hL)CJrT54)t`rj{EOVq@I+IYd8Xolrp15H}nnHmxhfHDmoxpAx9{8 ze*z%uL&9rKx8L*~omB31cZTB?hsLxltPd|i*qPc*t62OH|@5np=Vgy?Rornioqdt ze?V&RYTq!1Y|?^JQrj0yGLq7Kl9BN1vjsCWri_H68hUC#gpNK+(b2c?x$6T5t$&;vN4E-7-`hV<7gBQS1y|!fk|3pXwa2gH){7AVXtfHEXsT(qow!#NN=R**a!4T59mRvF;&SE#0if4O(%m~<5v z*fG{Ym(HP<lyA zAdXAH$FfM-4#)16!U-cDy>kH;nJoOsIX`jTDj74q7|gjOA@D_GCa^u9h!;2znrXN( zeFWxComqf|M-L0>Ne^Ktec`Kl{Dx2fXrE2iF-k!1&B_AT$usMXSvZ&re?-Lu4P2`^ z_DY+o>)q4qiY5*oMPY`twybYW=X^GM4Ev}!W>9|s0@J9Y<2(9tz}u~IB>);<%&Y@G zbrpigThp(p;iJ0Z8suSTs0S_pE@#4f<$}W3J^-9RW52xHg?o4qULQqW105-V-hYAI zwxu{~6qB4OA>pKi<&byJHBNwbrGHJH)a2M^284r@nk5-MLct9dB1&@j9eSCm(4T5^ zM}kAEG}*_YqdcILMQiM-dkZn~zyHy!)bNnxTq!$G=~<9``n17hT0 z*)f@|1|@wiDN9s-j)`Wu9OL!-5DRYq9WfjeV3C6PCa--wzyy%1KGfL1T;hK@03rlT zi}_P|Z-;@agJcClrS9soz}4OV&Q}Mi!#0Q^gC!}Hi(|avNl`F%{c1KkUKbaT^8zQX^lJ$Z3<)o)7VC(4yfnCAL_DOYFy zy^MaEXc^@N%i+QasAS%|6-e=46nc2jml19OC;~7xmr;8HDVJDQ0Zsvum#0<%Ie)-b z6jAH}IhWb;;pUIOaJGVvzh)RQqqC!1`j9as2!+u$`~BwMUb$2pQ35FpyJrYPBu=cnxvq2JI%ySmACE1J#b} zT_2;JT45vK>T(q%t`F6+E$a#-YBP|ytIMh_qNI8xaVRJvOznSJSNpa%*ZpZ10G?VH zf=r=QFSG4F*X!^+w7Ww9cz=48fXC|ulvRiS_$vI9$C3==>)=lgelZmY5GBMf`Y~aY z+YcgPIt84m0YV}T5N9zIgYfnPQ3*f0qf1iK+XIaW?N|>UE`sc!qdFK{ssjf1VT3?I zhG+LS0+K(HReq}6`@Is_ELHIIB=4j*MSEx*vjVe#ee4svz;c?W8-LaUVJ4#D@o-G- z5xAepZlymi_7Dj*J8sJ5oa))ei+r$I>0q5|->SOx8hr7)n9_o9iIk)f{I~yn`ZT2i zHAsmr>(*7=Aw&QePjNl_?9T|~4}lEP!7w2Re5ixYjLKQrTK|AT5Pw$pwy3PyepdQf zc+JZytjqle?+ZA(?timAzIN6gU%RNClbQ1$jJ^NhW9M-gJ6~$>B`ow#xB3!=1Xv_- zqmKh=DWNIc5X4wbPXS^-=}(Cm>$0yqqEnV+80;Nay`KV7NU?6k=}H`*=Bw2dAS^X~ z{5(C2C=j7KF3X$F2*Cs&n|!yq`}EWAVcxpo6b{JV&jB@fB7a~8$`z8D)dOke!zS7g zCy}mTIqz&ZdEg;;Dbs1$?Kr`q3~}Ata0*8v!qmWi98-tpGz`={fvhl+yrY|^X-Y!E zbkLS9Eo8U*vmy-UhG}Ljk#GfpWnL6XI}iVB5{4n!S!6v0q7dfYikRHTFdW%IbkL9U z1(9mnz(fPTv3~&`L4hGOgf2TjbNj-cE_N1JRP)(G?#`coiFD>xe>s%fc44{pxqmHl z+izpTpRb27poY#OSninSEiUsAt5%R7V_kI3y0Bp1q&03g>$}vMg-} z-L^bJ80_;0OZE@}u+_Hvc=97p_#T$b+-K>4vaMfGVsA zoSX0WW%V^s?523=NOvliKuYAC4(?_zrf;JWB$D_Cie&^rjPNvt1X5Nrr+&WmoRl|T z4-X-K1frf+4WdyB^BUD`WuZw&M&;g%&%Kz5WCR~wGO}v<*HOfE)QA~P?cSY#XWe7j zZlcA(wSOV0t?TW!w$hHlA#%1azd|$~n&OAa%i5^ykJTCF$Y3=tmhFLmx9+Po`#vb8 zpWgeypk^F=eZ2VfG^zo|mR>NZZx|>BL}!9x|J@YU8Y-RyCC4P0`WHgRaC4F0fF$7> zo%j>rm-%*Etn3NXrk6F0tNnDWE2I^@$jXpndVj8bCe<5!Io*vgoR-g}dB}|Na?*1q z5NV1B8`x9s`vI`?$?Zb8VMMHxhG9F15dwa8Zl0=N8S0bq_~7ha?&X2pw2tHk`#hEq z{oAd7>Ddup1|Zsj1ozb*;@e#CS-w5EvIO3J4vy}HiDlh1#WD&U=79J9lzNqu%?>gL zr++pH(Jt|&wh|i7nR>b0da)1$%A(}81?7VC=Octd_EuMwmOWPn$fno_6Tu<;W;b6z zO3^faKe_|U<3-wl9LK%vf=Bb(e4Q^#`6^we;x1eWXb8jO)@GlVuF2V z;}q&i!yFa?+P$iA3aE;|iFgY9jNu3;;-8YEKG{QsOAzjhcYmSxm#dH)?8OJJzJI6C zDh-X(^<>FyyM?36!gU>0+rh@{ayL*yeXaD>{Zw0b7;N6QpW5p=Y^O?vBwW$1?xqq& zkOnurSvU2&&G-6Vs*5+mCVQ!mgJRRCgR}t}qb%t{p7-0H{;wu%-_?5;`;wXu`g)1* zSM(_5WDhZI`F2gM1(k#SPIhvWUVki*ck{+QMbh!!*cp>e2Ds<)lm#q<6a8E_Pg=@) zIp+gx--&T$Nq}^Nu3bg+FnK}(sT_Gi;~Mi=N$(lsM_xvKO*M<-f#ub z1A%*UkYbS~R&mu#L0~13rVO<=+2!@y*%1qQV{FqE{TpF8%RajjDOD^GI1&z^hDa+6ZxUR@F6JMw?-8l;^F`}Kawy2>5 z*3?X^GBzo3>*Cjet}Q|NQ@#4L3*8cKYx@_SX_XVnYmjjl2vQ;I71jON#KDt*y3Vg& z4P&N|f1}PFW1Pq<~&EBv3{e^eZ77ex4#B9Sj$$UkQ0Shet?AX%sQ~ zX%*z^zpNnTT0zPa6{G|_3=O>~NQb4hdW?eX!+fP6Tjq;rRglH;SqEJ}A;f!CkP?F* zaI?KC$cCe*C`bn*LG-5}Pbcv43Q}^cj{*)Vf5_t36=cyR;U1$Pi<1;&@%#!>^{XI@ zCsL5bGb>2wM?n@l6eJbqDHS9?tRSHu1u33hK?>%Is_0cg3ZalD)31bVIC_eNbT9|L zR6Jfno=)Ns5>g-lW#(x$Wbr@w3JFJO$gERCa+Clv;&{f4ixV{@5X~?IF%)g}dH9fq zf8-c2x)<}Dqj_rE=EqsutsJ?lHTCJ?svr%M&^#>DEWS-YV~U=ow_&VK_D(^TdNP#4 zqmv8A(A99@uN5dy#j(kS>!99x6^ac<+!M$%x|+J;Ig|f2l-^ z7b)p3sVl9|hn0w3bds%cTeXnHXbFhRG+%Z?8Y0#e^B72ssSX?ls4#1s5C6P>--U$r zS<$-GlHl6g+?}W$ER%71UxcySc{uTskJ&t*OlNmt-E+C|@1?D)yhe0`YS=katFG655K0qn-E_uRVX?LuXa zSDHEbf~o_nxqn%h)*>vN{{q>BTr`&vZUGaM5f2puGBz=n0c8O#e>M`n>sN4ZoGlHE zH-TieRZ`Y+Ipwvpv3%KV%0xL@Jb8Eg zaj_JMikqy+R;?dbe>d=CaaN~oRX5))em(o~)v2~)=3H^Y5)HB)+c$S7NOkb~?BKTp z2D3C`kuZi>lQ;#W)X{2t@XN0>TEpUx5hcQ!=+=>LBhZjZ7!}dSgMWBqrt*Xpgi|&( z=A+a3!;M#vC15hKO1TY7ZaC~tU;vuxu;UmE<6Tn+HfTZle}}+*Ex+X;g)q2M{`^lCzAI*-^?*d2$em> zzo?>fd?dj^Aj-dofY3>N1#XDlSjXEiSBCf0a}f&o+XRjQm?%6Da4#c?m4VgOoV0{+ zt)mfHV17NPe?JTpAw~wU&ckVo^wCl%L$nqk*hiI@j3)*mo8H_MdA7zE8n>07^rwRX zT`u6H+Xaj3JZt7XEQ#f|lRsy-hJ#ezY%w+afhC+rBQh5(sAD-BA(m(>JHgvoj+AiG z>$W^cWA2x;%qWC>A4}yU%&hE5wrN}EcVw}Y(!^(-f4|n%HqA@#OpK-0NeuTToDGmw zn^{+?g#2N|Tdw*zY{K~(N^~Np=bIL{C_8luz*%#n<8!A+<$sGTEm3{Wz4}PdsgL7F z)W@Yq)VH9%;s29J0D~Y6j_6itIkv&+i`KE&Md*E4+~q~P^cH}LwV&NJS-o6?{!Eq- z>CR^ye={Z9^V!7QL%c>h_8BgT^VYvRMR3q5;yqy}r^JbcRNZCz`sDNbPw(E%8Hz~` zX1W92c-A2~CJ!^M4w)A3BsaaT%Zsec>U>4Kaf}ho2D`dO<^{k;>sBw-y zM(y*%t4_^*QzZwT`RJB=4Zkduf@{4?Tkl#L-{$S5AGQ8)!QdnziBZ#|pPUoMHPi9! ze`Vh35BDIhvvyaPjelwDUFJtFB5}q=BoiY${d%{f^3VQu;>$IXW;bmhEj@?83ZTHQ zz1g)+9ZX8#c|X5Ni^f|+_evHjV}!qoCV&ojmQ|Q=TEG)(n%Bc&5Tq;vfs+jbFB?u$ zQx&o|%r^N&nSvkOEBEDRysHO6W&~?yf6iMVkQ$K5EbLfzMANM8WWzO2!!_SjMN#2Z zzxAlHhp*ieU?0P>paJFpTk6o8F~;*duc~6PQ~+K#2x66@@vB#UqCatb@h6TWbTU3C zLF!im?KK{0cRbFZDR#nIYN3$(#bDT~Dx22t$9(vzv=+AeD zZZHV>0(mHV++l}dDX8J9-z2&=(~xTKZ~7Yce66nGnZU zt?+f7wrNO{Ga))e`uDS#B!QZ0e_^X4BC&lurXY)_-Z`JxSYe*g^EkH9xMeCfJ04PsI+!;7eLUr4grFS^tXtEqo&fAaFenF>?= z$D|G^8OeL}W9A_kvj7Cl0F}#D%g?jGLJEL1eQgF$D#?c89zLK1!TkDoQN>}vhBz2N zYN7)ag2EhEVHwC!egCo#IuW_%h-+8HZVhhenT#>)!<`Mr0JHI@hhTTFj1HgjGa(2s zJ;H@LTn$*)#Gy2;>I@+We|gxxK-?*rhy=J78wpReF93oNE7Y`s!Of_WF(F-j6qJ*s zyKv(cjDX6YZb2!=DUJVrynw2F_U`xuBLw1qoi(dEzv*f;f^ucVG_Ry(T2vR08vmRqqGD`Hra10bv{jAdE~rHgPz(hut_|ckmYslYFoHo!zxr z+2mdVcHFAaRl5uzrwPEwx72#aMTt9!CEO7%&Cpnb2{xE9<1j4=<6`WvE>q0;i;`rx z<{e(+`vBcff8oW(=dR3u+XYQr2UIivAAp}}r_?Dv0Q&D7((FOfl0jmV`@LjD-8>2Y z$3fSM3a<~=N8Pmh`9BQ=F@`;}RV<%s6+if|#6f5QhtYanoaITd9NwEY$GCI)jsf)0 zLj@e2#Bam)brtY+XJW_ayvOkPxw5?}?t=&626D@%Umo<$7Ki4AO+t$ch9+DBm^;tg z#!tC$w_Pw=jB6!<5Zt-zcGY3n%@VcMp1GYx?)1t_aId1LaF zY2af9dh6WJBym+db~I`Whrgv+uX>u1w;D($psISmr%-Sqg4>6pgXloBY+4WJ+{`3T zYQm%-MAza`NIHo-NRv|04vKo4LR&A5f(KKgX^6J1Ndn`PDr<8CDhcC$RviDI@V zKjryzRXh;iBAcE|4CBcl<+>>#j=(%pqMJ*p&J!4HwGJum5BFW()eV19m~po1Nvud) z0oES7Z^zq}s6ms5FHz{Il*Od8n#UO0rByVi-J`aj%hCY$(nWYbK!)U@irx z&Nv`cED_=ocj*0M;oE;RavTIqtei794aPZXAvn`ufxm;a0RKp2HYW|C^u70`nG(Ss z(#o?~O0_dMe9jCGYlFdIj0Tr{YVc%Hkj`}sv=cCz)RLv@VggB`ECE5Jmvo7Cj#FsR zwFKRy*(IC`<06rR`g&v>zC4I@!YFR^2vW<8)KODJ+h=$qCq;iWTItKYi8H>=5o#rv zft2*u75LOQkg35t1=kUjw#;P0hfwqS^N-aQZYVAEdEQ=01K#oK6>ay9h2xw)4hRR{ z!?si_;!5vdIIfSQYq{cMlF)ulbmG>5FDt~;>6%b|M|kTamR3fL-0h$w*?aEX9g72( zif4>I%a*77wJv|XAm8=%1yBUVjTDd2=5$3*sa{Z=5}H*g3qorsEIb%m;})+ZH>2AL zTajcS*$umR+2FTy7YrS^@mrpy5C3`p-UDsN5)ztp zX$DC0O>AK_6o)%dK2!}mB6+}q(Y0SVtLlz8==-n$7aS_w)C{$QGjo6iVOgq&%hI*D zPD1b(Y@byYf9y+Uu9@Z8yeX(p9YEet$xQtzFa+M}I(y%!`-AOF8_QItLZ$j8=TFsO71wAg=l4kETwR{_6v(AoAH6Afv{m7D8t-|drahg}zMI5# zs#0O8E$^!wf}o)FP0_BK(!1Y=K>E{g*7c=0J~pKbFdXK8uzU|a)k^CmepgaI+@B#M z2YTnpn^jl33?Q{HqBZuD?(414eDnHG=GCKyUT-LLb6*^3NKH0{0vJsMxkdwB(37vC z$}d5~!0iZa)D*57I0=xJ3FBfg{UK~?CghOkR(GyJUWJfBVw$vk)f3399qHv=Q)0fC z^2wIkaArn--CXp2$%Gj4NxQ30`whwNTb}@Sd>~{>F2WF7>tHT|3Uw6Fy&3S{1z|86 z*-e21a2jF)rg!6Om5hIPzIE(qi(W4$m?&%c@iv$e1Vnl|jai>@0&E*B=zCE2C~D%q z*<4`da2XEy!@ZP*5JI;MUH4KCW#{uSgfrKRr;EXV9FPy)?cmbu(ML}1Q>s&h@GndV zL5c_@Og(w#5JoMYjsl{o3@(SUQyqKeg=iAM(rIE7eU`z&yCy&GUcdk6rw~=QL@!~J zBjwmTFTy^<=GiQo=Q%r;WrM$+VP5n@luNB2?mkxRV0tk~aW^pq9Y0&gukKefwFuJt$(S=5Uk|bT!YVK80*$E|>6yt3J zvkPSt^ik=kBdoQbs+^Q{osx%Sg!#uUeV@pGwi0YD`SVetn3Zx8C87FGfW=?VsF@}x zVlbGo@ukkZFz;!&ItDH`wf}$Ovy?H8znN)#W|B|~w5|jceJYaEmwB8HX?Y$92?h&) zCZ=sFp9?1?%->WGa51X~sA}z>_dvk0QXOB;Z)i$Cb^r#|QG~6Wf|>b?%nbK)k`9nh z+df8Q3Q=$P;sz$}%q)A(2tzQ-^m6iqVj~&f*Gcfn-E#JvG?wilK_zF(W-lp3mVr0z zKBE;rmO#n1f?G06xqm85Wop!k#&dh%E`iP^a2uB|# zIIQ=x$?bbaB0`W6f=06?Y%$V;M^bP^xr%0s=u6BIJWRjMKHk2UrYp?}!bXBFC-z}~ zZ-plBW|My>1Ws`Tx)no06Q_`Wa~aK_CttqdXaO%jMi?=pqwh}iIRZgKVYiMxPyX=E z_018bz#i*6_c>c<^ZNJA&@`n42`aB*no@x<(|yzXdP-INvY95ZWWRcZ`gk&dnBomW zRaWn+Vwx}|;(C>Z&6E+R1eS1AF1;>+2eIL#QMPwC#z!&jgAZS0h^CN#0$C+z5Wtbr zNOBV(I9A)&Nd*LfbA<6(ot%LvW-YFbG6E|ZRoZdXzGxJHYgCY^%A(~fzWMC6YM6M% zH#+cZs8DcRzwx_^8qmgO`NRM<*;Z1Vnb zU%zZ1Z>WS9x3^xPNi@5EcZudUPR{Nizuf9l2;4|~mU&ZOvB2(}Vq_={FFStBT0L)^ z?VD*L8OGzr2R8F!v#ZaTpO?k99$wR*Y~52Co*IMxG?>|BRa%#oQ=Rm#aJV1wvTu78 z1m)H%d4&js7wQqh%-RljjJvB-qWX0Ty+Vq9(Ql!$C3V5@rQNfCB?)Sq#7Vu-OR#CA zES!x(w`oAI`!cj7hs2HTDJ1^B%ICISt-MIS4G2+#2;l&cZ<0=VP|D+8gR@7cdJ|lL z7~X45Ls+0+wEpKsu&@exm%CzK=VdXbUBjoe{q(<4?YN8i-Dv-0h&BStF&!{ncstFS zGC0=4T_b32@phVj5UJw70ioFKW&9k@itxM-&;N{>gucSL?${&l`v;py25^clCE$qA?3tX=nB^YX(}(J zE7vgH(3PI)WtEMS6bh|Qx8-HUJPk?xvfE}=vdF$-!hLYnq2)TZ61Yp-cP}F)oEVs` z>ikR<5)sNSCM8@67rv{Kl8&QYQX&LWlwVFttQs!#^+?Ij1Nh^lgiElk#VJzab4O`) z7|#C4?oz*hxOAFVSz6m1F$to=t>myb&>Vju5W#=B(A{EfK zG$kXXqOmd@ZyG@{cWX2>-PCv6L6O3%wn$;$8yPTTI1}-^9HvXve4jq~?l8y(mkN-? z1{>nKF2M{DCfqdyK6woXhmF}UO(2d?a`Cm}${g~4>9Q}6;ekP5-GkM#HPr)&Qhkbc z!7q$f4W@0jjOd54YL><**Bv#k27;6? zM$1uWjgB3e(cHQ*#!c*B-~iM_5FX#%*;SmhX)_knmavpXuw(y404e~^L1oW6@Yu%| z9{Z36nRLDGItm!s@;GME0mN>V{g`1`TFeEVMH_(Ke5>jsh~_y&q+@TX6*l) zwloEo5v2hXm!V7n6azOhF_X~+D3>OE0aXE~mwbHzpMR;xLn$MP?iaG16)6&urMeqe zHm3`wRJ&8+AJFGh8&O`cZB6Qk~Ag2pKv-nkuvn=aV8dpu(w2gT+T5#CD z42CZ#LVsspsxRR!0gJ25RmOSK)aLzoS>|o|>nXGf9C1-r`R*1+)mL^*vCG?f7gC-o zm`n7k{T5Olnu_+D|7}o=0Ly0Gkjs!1MUr72gF#3Rc*7v(<}ctGsfurdN`Z-SK9Waq z_1EjVDRA-adRMqT8TO%CHNgeIo{M4T(2OZb2U*>MmH=DH`z79nWl*j>834DIgk{W! z8}tm?8YeV63>NryHz41^@_EYQe6zuZ@d0yf*T#o<%Ht;AxXiA$BzbSOub;mE z0x`YS{&4oUcOf}UfX@2<>YTrN{X8g$Cq((qiMxe{U^ccl1la9;W32$X+uB9FM1T)b zA25h|TgL=!VqlU$yua*@?jm0rF0jTsiy}94*PQjPT{vz~)z#t|o*4C}K!3&{#(yByY#!Z5*3*1e-fq!$VX?Oc2>JP&%clFkd8;~&b+o<6D&k1*#K(Db3#2>Sn zLIU~RwCj5R$)1xs)_Ad*BV66FsV$=B>O*6C`c;`5yYr^8D^`1Z&|=hTu@@=68**tP zOkiIzU_nZ43_tC6#RQ>GA-~f=QF@1Z^W#jS0e*ALduc!7>Gyw~4Sy!`{$k$GXfugP zuuNs5JM30SBt(pK$U~mLJ-$GfW|-YOUl-OHWakyqoC26GxNg_!%vd_Ff-WKOes4lb z`ODt%E}B6EQj?=?ux&emwQboQZw^Hl)zXP=L&v@u;2xbTLtVnOed{%86tKZ}Wwq>= z6^t)~Kmj3YuvU{OS$}Iafy-u0rnRiMm`t@umBI3ipQHkVj&QNl42Ai&?xoyG?^kJV zFtFK?HNLsr6?t3C#1a8XZ&Pf`9c%?GPJ?TAv|iZk+j0z(En)|1s(VdwUU+bv)Tew* z5dT}GUSO7wI#*746i^57ghmzDSx;zXH#6 zl#wiDsYRI!c!NeBGsb+j6fpkd!oe*0z7rNpQ&w}I61#urCLa?o&3`wijr(}w zsv#=2f04B7+LmxuPQqpP-P09or?qaTfp(?izZ5^t|1+3MB3KVXePMpAiT?>2e>1;( zG^0cwC#rs|itA9y1Df+8J94#PTW!hlYuWf$M&I?jchUFL z!Cl>q_0_oAduZBS(e8H@PGXAyb`!fI8+U(&Xs>{LNF8}c9a9!+%l7t5)LCICR+x0$ zmRPF7UiV=wd>n?g8{oM#fA}5M%#I49Gh4;ml34_6qoTt0uux=HaRz!!7BnPmllWtwSSzn$Ou8R%Uwn&T%U+c z3W5}6+2uC-xcczV>sP<6KVQDTctg~5H>XH*Amw*mUfXrfs9fDPc56Rj!YjKCD>rpj zm)q8Ts8!L#eMJ>U`?IrETh>+hzpKM?*VAin*Ey`)4RSIJ(l15N1w#~*EN2`EYJyNO zMI?wW%}jJ~M}H%G+21xVJ?+<*QzI%01i^?zfp(Ak7ZQXHCiHrpvm;6%MWm-i3=drY z@@l!ym=cCCe;&q^NF>C9ab{r%s-rYHgRPi7BHBke-Fkms86=rFJ%8FA70StR(dY@0E+DX^5KmX% z(a4u1Gss9Pr-3hwL`>-+EWW&}b`X$)3jY;c_6s=BI+q$2FD}BLy|cJ^5r)xOfM*`u zEYSUg1@`L{BSUHYQ;tkyWOydG6vJ79} zi~Yeq%rT!Sb<{WsJAYp!Q87yw%2$}2N~D3r!XDo2i85d?5?IgEhD-KR`n(}k6c2#} zLc~G=f7UUvdsUtX%3_f=DKdOW({Lcco>0W_P=AKbU8aJPvj)lsr)$UmDlm#NRdHv5J(T+{FJppD$Qt>$c6P%JeN2VMBniR3pqolvb;uZBZ99MUTrmNs?M*w{JL)wZm@#;x^? zgqj%Y9{aH5l2vQt#|OI!`gb235f@kILw}%8?$aJNj?^_=A4AX6rVIo6W~5j5RbB5> zMxh+ZF}RWjmXO7V9c9%q`lU`|gQ|l0;8NTA zaJ9Ediolo*`dmkYkC>Mm?9 z;+f#u;JB`5b}7XLp5k;2JVz>KoS_|eN0%{{xcb7IkK_!3I93yNCKC{1QuKSihZ0Tr z>cuZTflQqX;v8yyNbB8o$m=OKV1ET+)JZ;i+^kjK<@lz##&xul_# zK*tUcYs7r?1kCp{iTMCgX#!pd35Dq=Vm<}@n5TuAc?k1y|5ETAj4AXJhAkLRZQ_jG zBoQEZ?FaxH+Z~!JAivOtj1bVz9Zg1}oli%FRC9=Nc^ubBjWFPlQ(+;4yspeM!@`N6 zQ(Pz&lFFQp3mqAC{JxLFg?~?1;NyWIV+LuH28KTegfIdlPVtDHdQVDWmN}q087+!Y zk~wmF;h~$-6C4%Tw+sGrK=%|Vw}hbd^r~q9$bX!g6DZ^a&eZch z3{M1IybKHSh_@m~Mo|8|FT+n);t5Z~P@NDF`r)Txs>JkdIMrY?#K(^95iU#V&=>3u zRS(IO2?!C!B7RTamfffCo!sbq8JM&YG;YAX)83lpu9#}q;AjM3H1-&5wi#UdKbr8| zItpcOWOHBD1V6vWv5iciPQ)y9dtY-8`Go$ROK9DOi$6|46b4}0M`>E@FkN(M}pde zo`6(HDw8xtvU)@|@0>yjWGFu^8+{?4PBBg|F8qz&nSbosIg?%V&SclaJ8&WpB=c_R znf;3ecKZ|qj5HhuN$v*{&w3iq=b1LJ5i@TOxbG#kzv|Vra1-L()K%LB#hYI7SR<}R z?8j<$0kJRJqVc+Bky3k&iNE-36jc|mMHJ@>5iog1CDK5mPihZr;bNVrC^bsT!cfqa9KmAJHLBB7d*|^+i=Mu7rpZOA}|6M=>m2g`&jUdFz_&K)wbDA zM7AVg%PE;5dlNx>kvH=UM5{|LVbMIF;=>s-M)Xfk0m6;*@^BMeh)J5>S=!qnn4{rf4VRDUB4={elWtEDG;XVKoao&Oap>i4W4 zSy}S%pt!3Ce*$EQ?2!#o(r)q_E3w4RUQB{Y<@d0YcYbGG^9h`&5eW04B=4i{5Gp2-A%pS3y9CqVU0|EQDCX^;V9dVP z2Y*m_@dZt9;Bj90^~k6+t*^Tr;0Dr5zj0sYQ^FIlAJcTyPxPTzZ;B@G>Sj9hpbBc! zzw)Le`+5ufJfN^S)LYohm&-B>HU@9yio}X_<^4S9V0c;wB%B!F&YU>1)ubL6l7Td1 zJub^CZ+3rfuD*A>O);qkfp$SeT3EIoI)7M;VQ)&hL9T z*GOSq&VSD6 zeYi2M+WkC;r^boU>~I04l?8YC@Yk!~UQE^uil`bw;7F;X3~{3-a>VKKj1h&7F}zxB ztDCN@D;v@#EnEN1!|&o-S5)nAY~9s;Z4$AI)}{iT7B=6ZgMK1bKO;sWCKSXCM;h9| zVVvS;j8o0zI0YDCa0T0M09HRYPJaocz|}1+i~A$v6obO@G>j>cNQeW*tEMO>$0!P} zvrq5#nxB{QXoNaDX)vLYoeoa=yYTbC*+~PkA@mJTJ1mU^9S=(aj0T>3cG9q9D5Fm| zEd4xyKRzsR3|SfSz|^n=zr0fzqxMsR-?FRRf3|JWWH(Kbcg6Ctkz{&85`TO4<00i) z-&80j_7mD?=swp)JPb`uE>6yegNywCg9|wn)4|hDrbTE}*#0jHf6rCo+*jsd_TWsU zuS!UPjX&SE`K=Gv2Uc_@1m`o@5SIiY2O74+N1u64YvEKRhvi99>fTgOt&t-M%f0Qf#-Q;as1Pv$K`OMY~$n|lv zcw%-VdkuW}*s&4A%mYqGjpIDQMsj30IY7}(y)R$dFF(ps8zlL_(pc4d%ca_EJL~e` zL>3#n=kS1%??+b{vi6YJNWyKF20GTP;OO>M()GudRwl$%r*Lb)B&qy z++8FDnmbOwdtq!xqW^Q2N0*@H{Re)+BB7TNZUGaQFk1o?12Qr&mm!1#Dt}p7bK}Ml ze%G%+_6bUfvvXnqx$+?SSj)BAO^S3?s=Tf(2n0=}NPr7~k>vdObWh_ZMA_NAh=V!0 zd;0!H-22Vr-n$Fl{NAoE-u|G3Cp1$!klt$J3Bg%tv=R{j{IVdDW7(ID zh$Nt}a6=Xu;|?sOGAvXf3?vglbK-`!nsB%k*vQDXSED5V7Y?{ND!159QG_9jS7UCSuq9Q-8s=|>@vdNY`m_YbNw!T7sVRHl) z$YyZ%Ada02tOq;wx^M?MChF@ODRj(Ji8vBn|5pZQqN zobjkr^zXBJmP{ElG1r8QVLI$09NbHA>+g@?T25e>e{U_h}|OnL|GXU$@%&%VSojOBb`(gN%$v zvpm@y*G>#L90i=F4`n)bA~ViUz)V39Jsh)Ly}+1PTKC!g3co{8X_lZgTUw4$zj(@J zHJqupMb*sJ+qkC5B+eV+bO-uKioB}JV^X)QH)XM(bAR(*BpC_HT&)Ewnsf1mgK;he zU1_N)FW_pPKGig;(ggElbG*;1jzPP9Q|xwy%afSr-PAsX3py6-YEIM*Mx>HKYklyl zXyp3fs@e?7;pcP3$y#RnKq)|bM*IH)(XUUdsE{GkLU{&4){snBjI(M}-%$ zD1gN4vVSy`)JS-SYbI25D(&YyeI$uxI6NmUt>N+S3bC zLBsWFz!j4E>s-sUK9)Igb+$yl@fs50I_uLkuC_sjV=RnrBYl3IocpF4KmU)?K48|S z0)d~C3g)IZ1GPGFAwl|C=??x0sfRY*lxh09z<+MnOD3K7NgY6lgyEm|$w^<(^rc!V z7Q)@yzLbf+^Bj~9F)VJgji<1ZNsa_H<3RA7cnsp|wDBAWXAN{D!C1KEi<*&8v#k98 zu!h>gq?)jX_`II&(Lx}88lcd~$K!t9$;>BcVXQtN%fTMt3e1M;+#R?Va#y425>v8c znt#%O$3UMLi1Szg+6c*2r6 zD~N*x({^`&1fl7ZFJ~Vj_1BgbHqKD{I)AoXDDk_7E#|%vU-wlPBLQb*aSca*$@?bz zyPK@BRUcgxYmFQ12m4yqE>pjD_X)pSm(f2S8meuxzmUP;p8d3AcW86|>&zE(8m49# z_kJ@9=xJYKnTQYr2P~BGT)CgLz#N(l6O7XdG*t?LF8*N8^+?|X%PK3V=bh=0gnvPF z(BX-$;*S|a%ClkDoyuZ2o z@cI63d4IF|_Q!dI2??o3Z`p>p*7(xU0;m9>m>8p$;#eP!4qosff*iL1it(^(pMS5P z59ur%g-mFoF?>X_>tY`h4&psV9D4*5+{pYtyNH=K=`o84to&0tjpj1tqj|QNWzq!HJBSc+t<5^I`;5Jt)(dl7_IlqB(l>kdyWm z{Y&ep!KJWz%(E}Yl*S$0Bnl2UI%XYShgocnkvVI;eMZjJHbZD9Ldd~Kbazy1O*83J z3y-C+;LqzL1Vg{U`nQFPxqo7pB72;fzR)2VbUKIja08(E{b4tc_@J@XViXmtyZi5# zpKm{Yd_QlQ=EV3>YCb1H@L1J9uL1^nfAitH7iovmbug`KAbOV?aS`BOm6mCaW~^!Q zm?XC)<4`Y{2>)aSaSf-VR8epNlvTei*J2`an&=dCWxk1P=Lxw|V#ZP*~ zQ8uDZI`jwWJWI>jH>LoIzMu~ZmFK#J4y7;?lN;c#)tHtE41)qzCeqr&=7Q+yH zt(_=~s-jJYvU#{UCN(V1=+3?g*bVlsOE=Jkt4B#8{6-H~2h zzHWjc8gU^HfO+U^IgNanD&2zGky#X+lv6bKtjK#r*VyY!5P1K z-^a3#pEYm#LzjL0p*f!Fut3yEO);w?J7k<7Az-H%#OX02iw8qSWw?nq!?QS-)p#Av zHnvoS*#Q3C4oGMPSBFtr3=CHgi9(W)0&-gGUQ{C{LLg#bb5n(w(ZxVP?OUTJNbk5s zxs6(|FpaK%SIKJgHMKu^_+ynVUB%2juHx&PS+Yvwjr$$@aoEHGqe_}aLn%g#=*np7 zgMe2iS-DD>HqHx0M2U7FikR3f#&3-YOrkQHFLAu?vKxj>;_$~ZN}{H~fHTslT3~lr zqRJ&*rM3;QO%TMf@xm-zWt}=t9HSOVeC=7Zj9ig_U5<5>6~B@=OCUS+N_ zUOa6jK4CDovPz;uc+XuodE7dW8~ET^Kby~8?RC6~SM${UxZhxC1iP@4K(y?5e z9ha+M-npn9A{tYXvZ)AE1Ex3sXX_uCk<|Zx8R@^6k@Rt@!m-+=Bv>?mQ2~W_E-G+K zN}}M(nCRo8LL)*%FBcUSsRsg70(t}=t2k{$Qr&TmPW}~?qtmv(?O;_?-IBjq_dW8< z^EbP9rxnl9^yJ%#)gnlx@Hs&kfEgo@c1&<}ee&@W4(3q#GQhc$|B*MnhCh4OfN5EO z2a}U`cVMfS6y$|z2L6DzVKS|d;+CpaPy$cV3|aSO3 zFHUKpB{q=VH))nY>W=2Fz#m2lhIS;}uGZObwQ&WRyIjb5ID1*#UA_#%aro?We zkU9b8OoBOk^z^kVA8ap!jHGg3_^74`NalA^a`yRhvw$5fsIdF$bHVb@xibelD8kcI zS6%SSzUTb1FRmFaX=UFnt@29u*r!!47nZ?tmtJ$z7{DGfl=@0hhWBcJ5BGQI4RlI# zjB8P)9irLA+!&AVG8b4s9Z5MIwGW3>(sE+bN~6rr=C^H>+&Wor8e8rw@`iuZkK1*_ z0(2PKtqu@{{#Dnh$;4oAF|4Ky{&?ljipq-vDE{|>PciV==|g@rbkCWq@&knrzM}3L zK@^2uquN->=|f`$BPsGeZ(E0^|a>1zI3T{Lt3$LoSOVXy7e;0n)$k8P!QwtKC?V z*Juma5GfszoO5P|Bd(N?f>IC-rM1vfsL2Agf@=-jIH-o4y^Cw*P>Fz7^jokDIOR}^ zuC#^9-o?M+F=3SC9jbu{FI&b{{f45^e*j?$l#>tH0tsOb21d06D}gNW%10yYAR4vnwoP6NB-%HoqZ( zOExrcr@Fy{30m>t0tUm4f-6=CbS6SSS2VP&ssO=`l%6vVurl|C38P^?7_f#pe=3*& z0(M|sxfH*Hw;PsHU5;c}3#<}qZB>)ve3kbxQ;$DEzCPFCHE za9%Ng3a>)<^Dn=8P{az4HnYfZ(#zYAA1_u|gzhxE9`*0n zPC;w|MRy&y`f~>!sb4xU$UvZXFkbiaYV+>(`X-!*?&XWq(4DP6-Gnbwo&EX88V}z5 zwqAA5G41;D<{Gwo5_z@ie_pMxH@8>s*4KTu=x_bBe*fXkv(2Y)?$$td=jsB}yt%@= zz~vsdZMN;2O|E53V0FxqO(gUHM?)syVQ{WJgds4L^IZq-sU;ncJ>n62lxDHlJ-xi# zV5svxEBjh@rtF`uy4SaFZ~Fd^A1;4ib*^LIC6Aip48c?Q0MRXp}@UYKM zQg^Y2V?SqIsv*lUHAG<%o0o4r+bDsQWAR#zIil9MPebIiG!k)6AoBkv5HpQMY|}){ zqhyuS3`fuDh#aBVwwmwV0|uG?@ScB!;K(-OHOpdNb%yVaf59-VBGA|XQ(2kdMUKhVcd0oudzgpQq4=Q-zAJ)(5f znv!cQh7!`Ef1et$Xe_8sG)1y0)yQr!qEka>imG1`#m`@KpQ@8~jKxCMv5+=RNK3l` zpk+qFAAs*m`1`=Xq>Xm56k|uXd5*r7u}qKp1yyoPD6rE&D1DT_r(wxEpttiq&l8-TZ}2Wb<*xk&UMm1h?O zlM1z^dbe?e)jCV>ZL{Y@SeF_$5P0x5sBSlf=V}TXsldrRn(;H>F)`DVU6!>yx?|)obqtW$j^nHIsVU-~8tvQw{bqvo&KJ zE85$7wOuyFX8H8iV-q7OQp~w&v8nQATa}Z9YdKy`I30&i{mOa+BOT{e)~u_rwD4o* zhq`-8(z}1h7;uPzzzs^RFlGGMsR>dkCOQqr?z@eBG6|;_0u#cH`4k*X3EyGaI-@Ys zL8qNS?Tgld(pYJOsvIp&#O5#oi9yVX8@s3d>m>D1e_S!SR2CU&;ow8`aW3qNLl z=q48gnD{LALHtD|aqCb_hOzLn6X8`+X4TVg8}I5(yY0`DL}*xuL#xk^t$EB(M>k)W zbrbNZ_W1NZOql9KQb$HjfcN`0ub$i%Np#XZ4AdL`5|!ZeMn*iLF@^kq4RH!CT3Q^q zILCh&R7dp2Ii@m=DFYDWoOv`jaKtwUw_jXMm<~iz%(JHW+s=cXr4&1BDFw_OrNEf^ zV?yF!4G|>N(qlMh3l2V?XoO){lM{8qfw|fFL?fBO68Q8OWv3czp)MerPN4Qh>wqh) z*abvm-AE#i$ChF#_!(p)jBy$BauAO-^}v5Fl1cy~q~-1J*T23UN<-jyW>}krGe|sV z6z4gU&@n})^PijI0)Ub2cZ+Q~Z;CZAmJsQD_IpPeRQw{1I25k$nLxB(dQk^5AGL`ei^l5*c zLsP8T>uk*!9E10^UQ~HLsBsw3UAoga*6f9%MG!05%<(dizAvB4B?9aTe`V3q3A8I( zq;xlNHkWWHe{=l2mx-2_19-V;5d!)a9~Lbz^J!s>-nU@C)KMVYIm1j+U|6etU z+y@ZrfHzUc->ap^th`_5vFaEW2QGi!|3w>gg1u{ZIUr4vVv)*jnORl9l&do!yM5D6 z1efFM0w}Wb7TY>|@U?qzq3$*z4afe zF3$V1fAR#_$d(5%I;60U&d=Dc74(6I4wmW`u5Kdr{Y!YFgcg;)&eSj zSxb}KHVnS+uh3EMtQhejQc9DO)~45*+L9y5`{=JPKu|AR`$)1U zlMo1g009t$c=fB~0ar{aErR7H;5=q&q5_>LCX@-5>)@YIF|&v` z7fJZ8&JWwSzrXpgh@?rvn}y_|UA0wz-Mn6WTK@LxSDj6yWt_zs?B&F~tKTBi#V^Z? zFBcrBaliv9G;EZ)4VBWtYJc(1r#M){$KQgO$t(?S9qT>-L|nmW7ks$*lfX~6lQL#X z%PIK37s5D<3z3F3q2vY>Wm%HAWs%?+(||J)f^qHF{gIcr@*XsmNmy;@x7}KQ8tmn3FAa>mlaZ}YeP!l^zw3EkBd&b&+ITp1Sm3x{)cuSL_T-!Upbv{kaE8DEg}~vbyse9s^<;nons_uYzj7K??G~rT?TTC#%D$yZUao1ds&>bH zNn?lj62^%PyPZxJ&wwUTLJy*UV7xB&wrq;3WQ$0F%1Z#awT-2(dEE_mI~o*D@z!2j zXJ4SW<3bktX%5a3c2#_gxmppiMRVUDoVxas93#objv+k$#9k!*)ivh z3%>Xy2S5z*0Myh%yI9~VV<|!wG2@ws+} z$;`Q9%3_&Jl#KWsE|oH)`n2ofp2PwaOJg6B4Fqh$B^`i*jP4K;cQtEi#hEbK#5CUw zf=;`EC{I%vzAqzh8>3muPok#t1;{Qg0jNYcu+dfC*fpr3QWfrhiho_ErO!WKdVZ%I z49W^~HjS&&KC<+WH;a^q%irI;Wkh&i(y|Z7qP5XnCi`&6>&CwBV-i!Lkp99DBHZQm zwQX9WViKp3&rJ2cXj_VX=`%H~88SLh`z(hBtceC;G3O{!GCnx^qj@Xpm?z3$Qc@xg z1uc;xFf21McP;>b2P;#!jit=EJsof5E_>mz20~@v<6ac++S2Ib#YXAgImQnkw~wXUZpm$C(y#V$P3oJZpG2 z>`$LFuP+2-oS3P?OJ_-$f(B5Yz>Q0jwQW{)aYe4_8S;(~T{!g>5fDxajTFt6w=`PS zHgC}|Du(0nT*c;ok&4h$zZ`8no@1~&yc`lFxb-AK0})r>2D)X#rA<~h4+xKDY{`}(|=cC z3gS&9r}#dX_LeZhwKrM>?qqm>Cz1WJZj@9$Dzrx$j#NCo{b_wfYM<@8J|3R2Yjtp? z6>XT)_TRVfYG?EM89O_+2h??Bs^gWf9f4p=nTD@cM#Sz1+3v=L-eBjZVm2w4Y1n$Mg@kN z-d*KLniyA(^1-(l_v%>e)=y(2wguct7I@_J{dp>gj4RWBIO_O7k-)$?&)cjkEtrwTXVoZh>LKiIsG^$9NhQ|3dgqvb^zUHFBbB2 zS0%Z(?~C+X$wXF&SR=J=UCcmf_(pp6C!bsh;K%z$G%*x)qrT`-LaFah49P#d7ww#CRf}-D~T6 znzOy64Dxf&(Y@{v<+!}ZIe9IP&}&hFu3{}Z#jsSx4@K*nv=B`UZp!Jy7(GLO@RTY` z!A*EwPzhJkv~~LULX%PRk{;8zihd0|X7gQjT^&6cT?@(Mb^^|u7VN(FNM}uiec#k{ z7P!HtT@wC;KdQF}k}wda z-5+Q(Y&5V*96|2JLMH<|Kt%!f^r#0jzBB&^PgSxFm!aeV69F-k5wa8lIW?C-f&(d+ zUAX~J0wp4suekv@e-X&Gl1WYiwN9eV;p*4VJSt%E`-n4@rO};dJw%EbAz`+UK3@Gy z%FT}x)-WrQxpKd(q>bICt5-6On}-TWZdsNjFQR~ixwh$CfucXYCgaRaDZyyk493~i z$CVQC{c5eOiE|oGNJ8C7Z+`JUb3=k?4liSD!h}>$-GfI-f66%MNf)^kJC`YqiW03E z=kh6;S^+Cpqjd&h0!FbYbfPFO3<46%5}QShi?+|`Mo2X9=$X_{>n0!Q^-!SEk|oBx zfB_#H2{9td0B|z#h?Q_@VWI>sFL*Ng$!{O3ZM|9>Y5c!mJO+LPB(2sm0pj&FEgtg! zI-2DbjqZQ>f2N};K6YOM*9bo3`~xTi}FMwMZ4q8DjGV*Yve%1`*H z&MvQty_=s|_uaAYW(WIu7w)%dT;91QWj1xyc9%w=|2_DJm>gVIPoII7z!TQ9dz~*W zi0`|Zn_a&%Gw-%c00!UzSPCL8OyVEMd%3XFO=$)pe<>Zj9Nx=^qZah%+`Bmpg>y%o z2_FR@Ox@EzG~tA;J?2g8Xz7G({@_b@+(0zM+r@0zl6fwT*gZN1;uc<;Ni`uch?NM3l{000Rl z;dU9=J>fdOo1o$noRgQYTwp}+`Nyys8Q%Sr=P4W{WC<@l6KFw_jx8JPO# zN5BFgQ)}aY6F9tQocu5PixE7o zD`50eavB0AN0|ip4C4F++#9!bjjJ}Gj?CySWTwHj3lC**P7h^VJJeoiQEu-6pG2!G ze@7eed5{2LK3uSW;zGNYh;;Y5+s%dRc-uHPlRBV^>KL!9k04nR&cNB;H80B29=4m2-uo5MuFy)et$#z&UCQ>t`il zf$-(S733e67k}%xa#=o%@BAnmi(MmA*Z;R#&72L_%h^NNNXM&lbA#~ZJKaB0&aYxb^%$B}7Dk0Y%I|2^>(HI3dm zajC#J!a~Bw6d=?UakzYgI_&}#!vc7#7Fqmb?ROM#mocir28wDHqi{PMLYtkp)23St zPt`1+k0&j3ORy7bWNXeXr}~pEf2T|nrc(1FEhownsrh3q=SwIsw45^qHX+J4i;XAP zF_|=;z}r05b^8B4iQw{nPZIz!aCA)x3FMpmQVI2?((>uH%24a|t$iurjLF2jY+pK2 zblR61cxY%dzqv0(F+$6i>r4N<0snYk3dV&Xojuc+1_%jg#mC$cXb-#2e|bZSkn(M7 z<>B<*qfIHq4KWel-5(B@{?vR&Z-d7u69Tx0ib8D7!^$Cg-HOYmG zZKc2CU))!Yhdj(w63(DXS^@MFx9|8{z=adBj-lRId8TLX((UegZ zQz_(9b&et@e5BIfc&W9_f0*EUnpCNqgg~4rGtUWCDB0HvqtNetzNe{g199}Z0w(3* zxGy)pwNzr%7IQ3CeM*JwW-+@_&BPZ3JRXXvG4K@py-QeNvFa}D74k2ea9kLX)Jtb<(PNSd<6b_f4gYVld|!X9`*t{Q`LN7DVQ|JP zc7aq0zXtt+s|qS{e_`ND$D53$R!qK-1Qe&trs*6N_}N6pxAbX5ij*E0C-eZvq2oXj zwRA8k&{ks`|8;KIkg?A%BH zrRnwLGfoI{f~omEb01*tsf@dDH<vi^fu9;(c@yGSg7gx3z z@799x!h)>0?Z@^NZht!a;ri^&nSfcIfovz4z7iR#^KY3P>1h=cfvE)rasc zd1vZNX_+%(@J<{0Lt8JF%9*_11{&bEG-@{VEa`E-P$F7I7Ju=R$W2`gXL5*{aULx6 zPg%$_>lp_N$i!2zQ<>7J%xcS&e9~)L!B(!0ej9`dc#2t}BSmpx5MUj%uE^SuZJyDM zaumhLr|s2BKGN&4K%*tI#=L+7KlY?t5?KMM2;Rh8mSD6nnN^Bubu6Gif2udlVris_ zZ@PT`Wyxh5_Hfb$(_@n>gZW7Z{9D(+|jG1?)p<}lU%3+VfgGhee%$o zLulVexyw1-!7xD^znc`%#g&;ZsEpX6=sZmRNY{(RF*;_ z)ll9zh0(y7l3woz&}wNKW01bGd5w#QJ>uC7%=kQ{T_nWjZG$`Bc4#15EHyXzWxWu2 zxm%JqgrJ083c|doVsqOZKtZTxbYQe->IW!w?~B1A45Bdk_xRBHF4VXE7S+6uZk=(6 zSIj#%e1A^4c6tQpqqv_x0-Y5PI2WIGL{*bD|GHdwkzfCE`HIcK@KQ<-FCv44Vn87e zoMyg|(||3Q1K@fIPD5FStIy-Kmr!8fw21=seYxvqNiD*aev;H`VIX)Z=~p#kROUM+`Jt5eYpcapZs(o znDy=%_KV55SPCD zD1X$5JAwPG^7((7!|isLtQKIlEs81im36rr=+A$Q{%paVKa$H3tp|Rp>km)zU%{Ev zenL)uOit%C5#FuBU5{fha$ELErRW|QRQtPKwW`vF`-Opo-5o+Q&T?kd39sWx(iRY) z@^1U;m&;%2wGLs8BcInzPgBX?^ztD<8-Ec;Wxbx|YVVjk5+PVWXEYSDb;mmn`ls7A zZ8~og+Z!0X7D~^?9T#I)g9ePKjNs3coT1w>tSVv6lM$gLVSrkX@rgVDfnfg8+ z02Axbt=cLbx~9!#+MG0Vk2Q0Yk+?a$8)Aa9qx}4eW?}@Kd5FMlAHq`uqDlR$1q9}N zyF!=G#Q_z6FPGg1eq>X;aKzw=4g|4e^iyqKMdT{?;k4jQ-R2v<0t#j zZ<~1ZnvPD;1OVcN%dcY@;(UcruCWYz(y3haUCKd!p`gC?)UI9i_aYgjI1~~+n;Q&i z{ud~@?C!|qRfXoRX6+Nuical&TY>^Jb{XiO&9Npa(s1Bk2jmPK^;;|h9Nm>|U#$+i z5-tX5+cSA!3IK2aWFA;1%rOnnRmMWtsuc>}!mBK>3@HzkgWI_}0bu!Lq` zD9{Ffr+9wo!sf7}WvnNhtNM;6=)RL?+v;{3Qx2mC@PCCc5Iq4q!@Cm82vvjf>JE40 z{-z7GPBj68RReyBrL#QJ@HkzR(Nx$T1-;xO*iw-D>M^cKV3sWOnO9)z*tfK zA*oo?YwCf)AMJ7-VvW$*aEVO0)RvL=aVjx?2qM~Q6}m2TBPo`oB<_G_)=NJ%6S@ro zvu3x8W$23hcGujL7zxi4|Fl(kvkSD^9D0l%EhmI2@@_TgMhgT%#aMn?*wDfVD&zZ4 zn@JQ>GelQnSdMm-cL>7~>Sz`z4sjecX=rMn%DmdVr-evCu&`-El0`yKj60tTssu`Z z7eamx@GOUKrf!bNAl1N4^a4h$X%yj6YD}(GS}Y^L!{ClSsn=C~OEaT1NT?5!gauL$ z2q4un<*oqFxVO*2KDOz61mYk!F;!rx&?R2#PssV2+C9o+96?GR!v~dD(_C~6ZtD^I z;|M$b96@VC6#}~^K~1}9CFUyZ-O=lRIHK4bYL#Eo7pv&c`0Z@-cT)bRp~lAMYijjB z+W!d)w?%pMR#Qjy0h>bI9okr!UL9LLHMD&4gu-zqLC(V|K_Y(HxlG$+r+ z*PoEO9MST3NMisyplM5MDf5P8bd(GbfN={lBqQuUeyYZ#m>9x2xJ;f9>b}HoqIw#z{J4+qtm8jP{tItE zUj~<<Yc^FmnF^!g<|Ydt z6Tuu27{bze>BrO$cX$Hz^nW_i9bXQO4$LWdQ5j7b7yERkk+y^y8kata=%b582}v~9F1k4=%^s~#QQ=|GoGQRWYRJ4mRrcWRLHhA{B?Nlj7pki!)nFrf56@QO| zjORsK*M0@PedjDOqQg{hK`cb%lws)%cFVDJaUFknJFzUD{qpvlyr3XKF=mqO&nM1F z!sO8fNkP6?wZE`iLyXifDM+25xUnSwqJi07P>`r4iRAlbvqVX8L{XAS;$rdZjkpg0 zges(%AyECi&p-b5(~oa|#zF+jq?Ox1o84ri;RyXg^Bzbzm^)B&yWClB%6XHoL)7Ip zjwVjSJbP%evTnCA_i?+qnfN{m-*w2aIoZkcq2g0SDpE=ad&pg=%q{dWM?jxy6#8)J zVlt8zeWAV*`Y2+UcC-vr$3h88=^6 z>($^ujm@kNgPP54k=4hS8UbzDaJ@fE7!{D8;^C0@wGVCSM-WDfymT>xQidl)ONs`POPr>gb6|H#WlKF{hMu=T8l zJ+|LrJ^mr^{0uy;?19S>o_A@n`CpuN6fA*Ied@kDe&uF$l~o6c>rvw8d6`y^zux+l z#pZU0_?6oafzkGtzMlXgWvi}Y!jz8J@w&|5^p0^SU+P0Ph7s(@zA?3Ui=F?s+ax)ucSB>=Bx=^V{+<7ZBA7>lgD z2FNhN#ww_PN@h#MMd}zUxZ!uc0j#beb7t;t57hTN!cfdsIqkN=qFbm`edb_=4IaGX)z5^%pn#HADTP=H*=ku$M^ZTNfdK8A^IeC%WPD7 zi&tnf@Pl+-bzJhsk3F?u)~aRwYlw);wci0%V6mxxa;!YK;qD_nLP6!_y2|{*JS~d6 zydG0!$^;-JnG8*EOgbi(K%?$F?hI-nUv}*|yK#`!>OPRQCf=_89bBp7BLBy`aWq@u z;vxHR^YTLqMtIm?FlZZB*vaZF^PE0rJwYS^sBDSF`h}Ny3O0V2XSYoV5(RZf?c4#I zn>3(*GjDb6`&vCjaNAmBbnhbT=Q&g_h|-aVF5UZ|idRlaNGN4_{B8n2TMB{6BCF?B ze(TiP_JyAH9ce(C_bBj)T>PSX;UdJnu2*#25w9rFGi@4VZNezS>1|EK)ktDUyQ;N;1^lMuf zF&c}82-+s}`%xB<`d z?-Ahvx^do5V_&V{;+LqM?pdzODL`>wWaoL>}SV+ z*_d5rm!aeV6qi8{0TY);(E)e?#+Sg+0XToqtvTTo6$+t}*?RQt2TkU%_$i?zvnILs zqU$6hl5u!iC0|B=qI0o1rX|r##m;>lbDb8mW;^C4tsXnTsU}vbvpB%vS!feGP#4Yn zRk1EM%^QqS98txIR;G#nU-meLtSoeRs}v6n{md{rh33v*im0{=N8>QagTrW z=q%%)gMA98D3u@#jGx#uO$;S8%fhCQ)y6ux%n~I8p_HG}DKZeHSr^Gx8H14xD(yMy z7Fq?`5|zoMDw4$$zG2o%mBCrt)4uUXemzzw1t&_%GX(HwBW4#%EI7=S&u$KzW-?K* zc}7)NQ`a9i3rGdYrGNYCbKv@rb$)+|rk9r}bN`)6TEZWO?{9I~yQ~4r6HHCi?nM1O zr^Ip|7JH`ASWtX1o-n8;*_qn)R-54BdOI%{4@1Ug+f9wmroS~0x5e+lF{hSMeo$M_ z@>PDMHm~dQW)si#c6!1+TMo+FXMDgqJSKt6=M2pRDuPUS2F4n zw|mn8$pZ_?BPl?u{{Z!m-`SU;VcG!0Tf3DD5$L$Q5-nOuSKQW8q}V=|>+&QJ6}D1(&^ z3f%*0U$g=Mno5Jx1dDG#!?d+hD3gyv<43TL1uDr&q|_7m?`tb&7YkC7Mm%!a&B11- zf(UG9H0o;V<8{6$#uLSrdwSzi;A5M0F5}>B3>k8Mj@RB@pZ$CG_5(h=z-5u;bKJeB zif-Y5huJg&EECL9)MsD1b55abxqsOMGdEvQ&@b0#<4K(CDb{7FAG|(DQ@1Hi-IRHj z-}}enGj0G+HgnM7p;+Krqd5b>eytxi<~OhSEhtWJClK;q&V3z|Yox zyS~Mx%KkB4e%{)+Y3k>=|EZ_4cdck@xHklUV^-u&h@IeFB%Xk6|>HA;0|nm zV?>A!+peORbQQ(0qI$J-WFLj%j=P?p<*4uV#W-Q%^!DPC;K6j6c_@n}pVe96FSe)I z9EJlJR%bK4@^&Qv4;o$1E*o_YLny1OA&YoQRJ=EtVe!K7RH=l7ujZ`_D^L8niU#Fsu5JTQntj3SzHuY-L#hz_gCChX}%d?shq1 zpEO8y7w4v$mDwk>ZBB+L2=o>^T4L|qt8psBqMVtzZ~=ft>&6R5 zT)2eKY)L}@()QB|4_y-$Twns&lU->J1(8LH(721ycDS4f-H}aixCrkXU230@&J<9F zB@LY^O^73b~DqnNpXL8_$`8U(1CKq(}U9TuV*2H5=#`FVR_wu=n3ki8GT z@x`H~6V5FZSYyb@xNk6jYX=gOdnOMJB;Z%cj#X@jSw)g|q)6L;S5|&yhS`@?L`Z(R zz+qMjeHhqSd*}DRZf}PKap=~xlSB~Mb&YmcdSO-%c(%ud>?3fYrTf2R?_~=-+~}$@ z-Z(voDaN7j4Nwyi**l`F_v z);F%fdvjg4oiZhQw_x=B z2T}pV`wC@lWOH~%c94lDkcenxpfQNIAfSvO(k_1k@MkF>El8TT zvflxd0e|*@LkZyCb_v^RhdL=_2?y_Bkl)AA8Hi%m`K47x;FpXl?E%#Ttpb1soV1qS z{1s?6^|ThU9~j*MwW}_K{S?9Wd%O2u`W<1k6jn^YrU(&%-0Qaf=W-5*aA+=F1AZIO z2s*E>>|mQLx}Dk|BcvNG-P7&ITrJ$NQw!&D%YXIEg`#)#{EI_u`YA?&(wEL9tLLxl zOVhv|rUhJB3WrITxNR(|8es1Oh+4v2Hfpo_spboIDzv~HC3s^_b)!cb3rCH8E;H%o z8cqx52=1+}uRP14tTpDEs6X}Hv6~m%KL;JTJi=x-B^dKQgoaqbzX!*`_9ZTcY_fM- znt$xMCk-!ikkdNv2j>thiQAF_o5=4bZdr@ql%?&&-c?|zpJ5btXMb>S?ySL>K3F;F z1bYm9OI?Tr>+Y%T?UuVz>sXlnw8^hqko?xQP-8eH!+Uf~eN73c0vpat1?W*a=R|B0 zOm`?yt{JIrKjgplG+Ej?%B;!}R0LKjbAPW2Mq^_qrDa-F57pXr>m)UL7ogbby}O6 zj?&Q0BSjV5!StccH;jb7k&#duqlmM!FcPhcbAA{jIfvnvkqiuH>(w|Ti4o<4_X|dh z>rdw-gbTzd*UmhO(?Mx{Gb8zM2qUT1YPD8pAPjxeaOg79 zGn|+EZD>#I*81r5TEFLoa(J#M0`a)r;1I5fd)lV@!CAiyVLy{{Wjo9Rrg>o2{iX_T z#M;uLyV_@E7!ZDuTdkuNp1?84x?}gl`|B}~kcBZU%?X)u3f&RxjSMEXG zzEDA9UKNFTj(^O{hj)X(c_h&{Z>V-unE!hitRWu=!QuHoy3{r|m!aeV69F`r;TQoF z0y#66A%p@cm-OTTPXQ5^MCAdI0j0OrlWiZozxvS= ziP>sk4>KZms+1J8s6Xjv>q?$L*2(8sR|?6v-@>@=#-n<3v*bYV9^mX)RY4Xp-nes$ ze~;4R6SiH!z@hz~8j!V0gdhM(y@Y{t|H7&~(t(Onrj5C@Q65Aw6H@t|o~oVel#zn) zMoe>YNvDRLO0Gut4ax*O#iG)Qp*S!oh#r;mutk(TBmr~fUl}o2Z zUX67Lv}-X@-#s3D4RYMam6oEkRa?}RXTpswGoWs+-B%mO1s(j4MR~a(j&wU#lcG1Z zzN@EubXdHk@}vY$D8jt_7$zWwYZZs|wyI9d9KpQ3JrOH)N2?Vh6gxtN@ zkA1eMCVE+>36%rvD9XdW*c2VTfr;9g=s2+Q)yrV@@(kD)FnB(=qKmE8mPIp;53R&A zc0)vOiF(#Vu>Bg%wr(6jSHNS+e?#A89VMP4@fD4D!}Za}T59AnUKQ9R@LExg+<(c} zNeu8Ko**Z2JVs-Nxe6B^NJv z+cY-otfL4VFC6X8OBLO2X$N?}hSEPN!c!)@u$=@kY?OCQ15&ff_Vs<;f47$xOSvnm zqqpEo%wnOQh47&+E%c`mzUNqYV0^QMtA>!Y zU1ofcOv#u`jqQ2_9;9;=8)!IM*Ox0L@%9VfAYc^W&C79C}IwN zh%WP)IAI(})Ta_cDbdI3c?sdf&?zB|4U6GY_{oG2)c}$`A0hmF2mX6P2tO93<_SUw znTSa;BZT+Xe-&`)=Dtz%*<0r9(YKZ&R00IN*%zHC>9rle~L!i#;X&xSeGB|0XBaD&4v?BQJn=@m&}%D@7~ko8a7`iln7^&FJ5$+ z2%;E=*&=y+_D3+Se;hN47^b_Xy~&!YxZgdNQ%j`dCpnfgPAKg~q@1l5_45#te)t5a z6KoTbDsR?RIT{P2(q^8ANjVZMT`xz1rypU3r+F1NRySb;V5I${37nP-(|d+}J44 zhG;md%9EQXYzt+pEaYOpzf$lWyLJ^S$%$6#1Oa?(45h{r2&U%R7dQu}nM$-Ugj&&F zs}H}FH>=TDapj*v(SPtoI2wO51!Zw@5w>shCQN*_;iOzIKVpI59BT>3w!pfJSs1nj z21brUWd}q4q>B1=PKo2ZzZtB&&hLYkXWqDTs9D{yF6#F4c^1!)ZglM$IDED$>*iqy z!t(@9VTul((!{WRUgy#3?|)q3X~!_BGGyLJEx!i&&Fq+}(1=>|9z5Xy(3=FA9OcZ{qR zbx&oQh~le$QZi9QG@^U9L?SajZ;}AM!epipPB#vD_mL>Z0}j7OZ8*sDNo@FgO4Hyq z;OU7G2o}c!s{xF&%l*q^pa=k`$<~;24HErwglw4Tj;WzKjViCL*hrvC7XU9)-Ec}Q z`t}Ft7qdr4K<6+s09ND1uR0Yvs~hIHz%1H`is8~~$%Fql%E zn0#K9b-Z@hW}2{myhGv;rS|5_e2;e{SsFDbEa_6^${*mWilB8u`wf1Ti|J07%i)^~ytupA ztA{y(uTo|DPg=}$^ueahz_eI%ppoI|op0YUV&||U(UKR~wfYWNfK*7nTj@zGWqB!u zw|3W9M{Rfm64e$C$CFEd24Z)z$wH3PC=(rr9FS%z_;|GRF^}!muh!rv3tEW-;WQ!0 z>e2_)6e-}Z!|D-er{Emc*X|ULWhUg*Qw+macyi_4_8bR@a%C{}k|Tk0JRgp8m5ORMT_=vJJj@yl@G+?QH6(!8fRO~Ikd=T?YP_<;7cfwFR8t@Ha(13Jg8UdW?oW?G%z9M#YcR7_-ro&JTi?; zdiEtrl!)~XdGT4l*%*CMD>xkJmUmL8h~Mvfe>B zJmWGMwb7_C;(hunM^)_kiQ4El{gV7Q(vY&`*qacIN4IUOr?BxktdbN4u2%HuaPF+C zZ5cGYjq%1@`l$I1FWpiUMK*ctihZ}V3_w`GbD*yz5EMbd0#Nd=3?PCD;b@fM#-QsJ z(H^gg!yefJ0gWeGg&yF%GSMhbVTe{N$(wT^91G;dnoN>Qmv0oSirxtIdPQV&53Qqs zj6<(wIvtO+*O%xjBa7#f7}Fy!)6v3|4D~O-u}70Q21=S_&J1_v&lA@bATy z>xpi7DFf(A+0+SCbPJ8wq^=_caQVgnC;~GeDFCf#}lTW0LIlyo07t)&y(4zb4IUVmed%tvy zIg)ts#Qy2~5>Ma$(%eydULpU}xnSj+d#_Zx`@S$Tv&_0%b*o^b)R|-&r2Hqe#+Ai_ z-++G++K2YC(|!|*0Z$7SWA&&)gk6p-uW$8{UeDn{;)xFTqwk?nd;P%AROCNK!;1FF zMY2|X_JE*;1n@%cl{3lE@0@uLXmMuh>X_(AX0&u%8atRI1@19Z|V)O(<$aC zWK_ExoKY5)Ri<;r8#HnLvlZkC^F{F`GJ_Z+aL7wp(lx|!W0~Z)%9wJ*mS4=0xC$Cp z{sw_~1Y=ZMvqc@qL2K{K)=?rO?(A5+xmMKWoB%{>rOltJM9{*D;QEVy=)dmuV>;)g(*ts$T4a6M8HE-!n@ zRV`4$kg7}}g$>EyoigfNsRBu;iV@*?*E=}ZWEt+l32tz(9@Na?Po%Wv6`cPwNgQEaH*1psUxu<8PBOSDVtcsUcXG2Z(WC5b#sK{AE)=c^&j zspOa`heST3#R=A%Nfl+L5ysEffz96fpeKUJv8YxKW*S8yghshZN)8lJORwz0ePy^D zUfWl!BjRSU3&9#7U$Jnx*_hL#Ys0nj0DT?xKbwX&8bv=ZhGY=y-BZ8N&;ZP=V^4ZP z3ak<<5gQ!~iTr;+s+o5@_h&euxGV46N|g7&ddK|3Xz48>mozJh!j8A}LI*=G&%wJS z_-Y8E*x*$dy8;?Zdj0A|rDtrDHGIE*oZBN>&vy@BieEV{^fFaqN$AJ<+aomb*oAie zK^6{s`}6p{B{;`24CzhSkpSM*;Bkg~r99N=8O{wb+KHC0@$v`=VidFhbTB8vq(oOJ8SGQ7dmo zuy|-P;2p5ch%gTmb+TIq{4to06u16?^vUL$z#+WR3rpfR7@Vj#O>x%xj4!xa6aDzF4D4l)RMyMy~Z}^{i1{{GF-v zL?c}S+YDOVW`L#ypfG*c2o2F1YygLE`R|OcXz`KJYJvOGMTCE!`|QhCpFvpoHxula zj}@1$W@4J&&-=+rG;{B2#d|Ege!)E#=My?m%=uar4t$Gf_K-W%IfiXYE4VQ-qctuq zbd)Z?5dGqA=e%kyiSh*_c+Q3={~p@m!NxXL6vR0X0wAO(1-Re-hzN4`h_xZaP#GJ{ zH!n&y*Up5 zpbyh2gTd+OOVfxPf(N(rJlmc)_U`-l!Myq);+CLUo^})X`0ClN#83Ecxsrydmg$ET z)1pvbbW_#CZiW-@8E{H=Q4xXZ+3vfOeu-4YawA3A)cbkTSep%a^!zV1JKQl+tr7#~ zCIGF<3qpf!nLo(%P1FowEaR}-PV}9d`=(0wUigHO{UqHowbB`%m|HweQNNNh&%+wD zMe)zR5Sxw4Z;_!`Y*PpJJsR3ZX2XFFhICP@1-a!w<0kk%eqmPC$MyLA8mJr!vz=#0 zkf0eNR=kp@ID68IS76+y>iIvUcJ5S5iGn|H2+vT$_YqQVDM$@B4&4TBK46n*(#L$v z`IFG-Fw`1rpgB#o`Yjn^IL&V? zoo@9F><%D*crqF)0WxoiQ?KD}B9aXww@Uq3k4PA0!NK_Y3-3F7TAdrwPGMK)&LSx! z{*KaVA6-{LTR*{DYyzTmD)S7 zMg-tMw|>zp3Ojv4`f?%nmCt=xy+8b535OK~bR0DE2o)Io-C_{B#V&vn)wuB34*+;Ti4R(`u zWjbs3kHW%kvmSkYAC_kLzv$&p7jpvOsTO0EfKeN`S=mY=ygOe`2tDlSqaed3qINvv zn+qhCOw%|+)#RPbwWAoSITmpr>iaCGk>+^bk7lE(`b;HU>Dmk)lI% z{G>7Df5oO37uv*(=y8a@NA@`OXl7Fa6y25<^B`F?)brtxx7+4gU~|sr)8Tx148(bNiL^S5J)v22Ock~ZU7x$9{@(cQ<0#B^ck?5g^)I9iJZx1-<9^eNumm(AzTQH z9WRa>zb792bt|`7p70BVa;MwIoBYIwnb9_}o0;}JThLX*3vG)_5W}*!&&%9LuR^tA zQ-(x(3(VWIsWL7y6ItXC5ShAjc3Lm7x1Z+BVJN4%UI~wzktUb8qY-y(4M2zfc~jEu zQhBllNQ&&3r0bz*M%l6K(^>P1ehI6G)5iMpJYPW|c@zX?RWp;Q#+!~DOlCy&hkMPn zY5=o6^aQ1sw zc!IDM-}dMStY7kpGk~CIg57PpGKh+5>9Ci$wR)#$&>AJ^HqA047WkrIof`gFsz~rr zY~$J%GYl_b*U-48lZ(ih74I6<554ojhT1x${lao#jq$O#(K6$F086Q1y&F9L=pa@r#fWFX__(QvVj zkk|ltlU`2L9^KZ#zq7x1f&P?jz-Gh&;ZL$#oVcH20~`pMla3{xWk*wKuzik@?M$TD zW}T=1f*PdDk{>PYm=@JikdrnyM`uC2A% zCit)QCIY80@$7beB?mH2T?`2}E_?<2s|0~p`Q0$M?VpL37G@y(sp>sT?Wxco*;taN z3_rHwYDpxvyqzBX7gN)C&(ZOx5Mqz@Z7ghRgJV>Tj$jgVUWkEDQt%?foP^tq>@nwc}{jbvx-I1XO_pR|p09Qk<++3arIGOW5h^ zR>_I18!`xs%@!y6=+)wy{F&MaUP8FT&PMyT=i5@@Rdc-ydg9R?xKEGXb8zI{^@DF; zI4^vxMB%zikkB5Kww;laRG-oudhJKcIW6WA8E``ki(k14W>b>q*$C-w9LCFv~Y`aZx@T|`syjdM@LstKV+54aAEYa2xeqUW%S6;5D!pE zd9vR)&L2!r@7#9Ws5lJm;f(iq*+}h`(MTF1w-|)|1slET3E;#;kbcqVa z!omDs$uf1iB_xOhFiLs@BnS@x=t`62H*~?OQp0AR@7rWB9)ciPP9+;O!p#MT;xrU= zGfM~G8znNQ7^U&BF|u6bhr}xT=#Jk=5~XbUy@D`ZsAQdXomG3=MP9OPSyBvH zba6?#T-s^*DGi5_xQLEwdueHBt$Zp}_X#9@x3{y4&r$r`BPTlB-bsN9x6oXPV*KB! zV6-z?l({>+E3`5;nur#N?_(3D)}U%#c;%}=Xsl6^dE`RRc~GE5SsXSF%^zSZ(+a3i z*e@=tdjhQ&;Qwy0@`?Jx#aZZ(OVWFwKw1FPWRD}V0mr#h5#&I|Wx0(~9Y|ycQa!3D zeHealK~vY~oopZ?un2xgL)>7*nTkk5W=eAQNnLYLzf**OnU%yuP_S%HMt4xuc)+s> zZUyIc<)JVRnBz^)~Syx;35^JoEU#r$YH=HlvR zh<|iT%$dO=pf7h7;90ubiI`BwVxNB#Q1 zr(gZoy|I_s;@&iJ3Z@ja{A2fyxcXtef=*P|^{YBs^%c9($hXq|JgOO8tP?XWc;4y- zSSOM+0}*bwcTeV5V$!)~f{u=@M=7Nv_!z9H+iG&ATHYI*nRZkr&PJm7wqd6O(^-0q*sN3zcJjWEgNYB_0|;J)^+)$FTyYgQ4%k zbNXn)G&}2W&8wke{UynLAS=hl!#xG!%z(Pvv<^S)g2xdo+H)xZa&?E5SvR zpi>15*88jDU)qBkq!bgqoE+JY-mazD4T0@zllQ=3h$VetyV?-`E9ZGRn zK`hxpgfX2}6pJgVa&)qAn3KcwH)#rS+=x7{EkmRtdCX3>y4+6rrF0T19E~W zQfV^fSaHVa{6bs|4)YVYq!`4s=nTl|DeUwnH8xm;qRT{5g-|M4-=h?9_pJk`2sXrr z4%8}mN7R(hEBlHVLuhC0P`sL6?rut}$!g^>=j0rasZ4k0lP02~?7FQIuCMfC^0LuD z)#J!NtqgQb!u$>3%32{Ua?9_1zH2VJBU^;I>4MR!B0n*n@H?xTCu;VTE8gzcbzYOV zlCpZSB1E!U@6L7iG^qGnfJ&rec&ma8uU&7eUgHyC3RX6Ta{;291_D$3KSp;)9=l8t z+qy5DfXj=JB^oO2uYz3K;R>`y+JO94)uKTyVLW4qeg0s;qm*pEB1U-gF%slpbR?OV zHZcs8zPg{?go(i}@u5#V;J9V_P>C3r?5be(47$7v%OuZKeDopaoR4b1Z(Wpqf6@z5 zK59094r{blL@AA}8`~k7dSiP<#p<^0`L zIh+l*`W11Se%0C9KEkar%aEGT_MdQN`ggnqYrJfso)v#`^moM)mHH*J9eu!X!K*+DV zL`Cs>$PT~UDNeY{8)bbg-1Y4o2md}7#{!$fMH<9hUtL(C^g?5Nn}MHt!sQ0f5A{gk z;7kE{O{4Ec30C}j&A1Bo!`cV7Wv<7$HI2+jy8kCXYMX=Ms(lU5d_ze0*?$rEt_07P$=w zf{2htyDJLxMi-$IhokUVluUX7H~Q)Wm)ncXWvaW#&7ZLol042czFxNB%%FluJda*U z=9cLXjL~+o>3z(-8Qis@Q%#w147_Qe6qhPS$7NJQU-rOUPU7l}gUa2v7}O+a_cIWn zm!{yX&i-#Cb>xu3tEvmtSt7-8p5z)iV?zTial-NU+mv9Z+2f>Ym*&#KkQL_a7?hR8 zi0h?dnV%YWPm&dlrR4=01X6f2GmwJNVLU|p$r1?Fa7wl60fD?@%|C;C=h)-%ZYWAT z2*q+!2jb*b7j@T6TAB|Hk}6&hGvhdbu_kf)XSce8Aa>~=OYwqW(KkDZsnP;5O#i{F zJHu3MM3u!G%l`NAoqAscw>`hX|tGu-XJ2L%@m zJRwna>((#o{Z4FY{Mvot#FM7|-E>HMGZlr05Y9Th-@~E5Q**HL5zXbh>)+w|RV=Ha z9JQ_8`ra_igptW!J%Upx+x6a{_l2T1Yq#_4T&sqXite{mQE_2^%V4&A=L=9FbWqS+ zNOgOPV!jOP_wdo^tWO_snmMi~^!NTMo*9>*sTHMgaXdow+3orA)JVZEmhWuVmdt$0 zFq?w3jT#=b1Gm$xRZD`-HLq@E8_Eb_-UV8!;G#JLO^b90Fm)Z;n-4THkyX9Hiau+N z*;tXmlOY$QbDNX6?DBy0a0RT8%t?{dU^~L*TqAO)y_6{9{7K#MKQ|q#-inhbxut-@ z)}b1b59RB!kC&T&NJC4e*>9dKNaY-_d zOrW{`E3AJnyw{B*&Ik3Wy4BQWG!ZGhRperav`98oVog)BmOpEz+5)KMMR~|a_`~+V zWj&eAFWeO%e8~GgNEJ=Mz>2*fHy&^^1y6tFb*SgXb(g0wjj6CHaxE1xl!5shea^nNBI0Iv5#Ev-h19^(ydi?`+=+5i_mT@XD)_)!!{4 zwSYYT7RP&_pwFxUFf21mpFVV!-UUN_wSnAsU}2zMBbhs&m`-35;VJ{zE1l%UA zy1XjE1T%-fFw@EwLivX|;<4{>UXDiDY#ae=lhL6{jKIm8h0l3$FKJoX^l!l9_xC?) z#pAhERwhcPGXHz=6w&H$22FSt;CoqkG~WPtmB3Oe^lnz&#H4_O zDBMPOn;CVwv@6AOHq&>(z+#&ncJ1>nF|%cipO!STLp;QT_j4CYYF2yY69_7NPlR<{ z9RCag$am!lewlIi@Nd)U^-eYkWT5nb`f?`)TjrTE_Gkfy@J)$vs}=C`L&w|xhKVIt z8X=cdzxkvgB}azCZot;a9(kMZ0A2zCcFNo@Mgc>Y4nI*33;0ytADvv{4zGTe&Z4JbD2e3V$npFy`_u@lwQPyLbLqwJZ*Y+e2 zayS$&1ZU1IYfRy+^!{MIaFjq{X4N%7d62DR%&PILw78)`2n~U+4VJ80&d(Cl%KRWF zg&|4zavCP2zS4QDCHwuxYV%&}U;=IKjn%7Es2SkX;g<-rDt(IAmpaYWCsidZFV~HA zpiEjai-GnpgFS9H=J~*ipQ3k(zzWO)e z5FLUNL9?Osjv*3`A61*r!XSh0HBl?>jr3T${AQ$6?Ups0!%;|UD^8{6Qe~sl#uUBw z;|AcTvVF7r{dtm=4v{Z}iUv9xEm~({;LI(S45~Y|q_xosyP+2m-b0DZL+0uCTQdaR z!yw@C@AF=n?F~2cX`}{BS^07G%dwfc|HF<miPi-Op z_7a7=?2P@)KU_4b$}(oWe%f5x4$xSn*2IYyS&(_}({?dFd@|T`T?8s_hmY%7snUR2 zCZ-&Vk~oct;|Dre?4=$MbZ5&NjzY#8cE~Zw=H724_9PzpsnO0NV#4Ve$zqJh1csoF< zuU-@9HZeRaIO6pC&3k;ledqEy(cS=48u@;wMIOa}xaJ}Q!>iZVXp=_;mCoK-QQwF8 zNm7=7pOp+ikXNzUCFVHGv*59$u%NU1mMMdM3$afzpG6W#MCZqNLl+q~1AD)`^+Km7PES_5hbL z*_&&ByaXzlDW!yLt^VYLa&O8o9FDI= zx;1jYu*^!b{e_%zI-#Xl(W8U0{9u5{p!#VA2H4lnm1mJ1a6s{E{b*@+`T3v))K#j` z>w?tS@QE?{sWd@7)@2(x*o>AJ!Ku_F*WrAI4JHUG=<6KOq2@exC1d*Bbd8!d^p(DF_wxjnhZUnx4y~| zNGm17uiSzvNFle(g0lg&$J&6-HZ&iUD*4MU(R2FviA7r#BuMS<(0|cI9@26$FUnYVh4QFgCW#z5BuKb``?GL#3!K!9+{yBnqZyHyv)AZMXhz zcB0)Tg`_#%*&-5vdS!Tzi4JuRW>_n+GPo^%->{!zaP9Iu*VJ(9JZ3Jo;P=KU%%R5Z z&{WmbN)yw-ulHGPz{|qT{NaRjG4H?IZ!Mf1J7L}l6%ZrjYX!6w78Mh8gq12k{JJzO z*?i^~H!BF(}WFMkWe+|QlrSa6>LH16}hTNu_INDVJl z1+tSAG(X+LqEy+0aG1$E?pzlo5y8ba!zj8w(hjaz^mU_##P2mrxUYi9wV+B%)HuSjPtoQ0mHjj9+tm=U>Qz0QF>Sd?7{Uf3#7Nc9F|TVjQ+i?r`|s5jEEQq zkypjQGYD9PE;EBT`Td6z=OY>}v?}$QrOtWu21ot5r)5(6UbNwtz1QL~O<^o<*ad|H zW7``8;}-Q>P0Oa!YQ8z7=b zUEnZfI;~@7-wZZia4-0tJc%7L^knB+FCxjWts|x=pU8Mk!UPHhIWCy-g zc_?5B+^UY8Z|~+p(6g=Pg2$k33o_4n!7#VpJ?n5dTxJ;Tepjv0meo_lcWB)>jdd_z z5TSGD7B65K37L*lXLg5z--OLQN}Mb~jn$BEQJo4Fg}ntBjHkOp;Nt;KqgY6}Gqs8h zs4!||)3B4*=Z6?Zy#ZPmHkV@10%QkVTN5xD-#)mq0q8WxOHPgqqLm&(*O}Xi?iiqB8hqblVI6lLw^nhJkK(lbFSPJJJ^pvI%x}!a9Hbh#SlmyBesZP zB9wc}EDdTnU!>sd^8AK3jZHDO1{w`b*a=V2%?K!-O|^B?;0@~*tFnm)s2}0D%NGDs z37o5xcyqi?oP3v>ED4!&ARZ3n)x7t8$)^bjQ{ z)9NMzyJJ&lrbKqMvjT-TB3$eh;p?PZL+`?rVO@QQ&s|L%8>|C`_j-;Q{9};U&;9cR zs-S;op;rJHL~i58&`S=+gRrjjzyfqgUDANKsQJ5$Ow;sRi0 zAdmbZrqpx0##NQN%`_VHIU$}tlS+F$$cd4WaNdYy%h;ve8mgR@+zi3XX#hsSYbt_BH&CI|k=TPQ>M=mML(? zcBhXBCd6zO4O?>oh!lrL$3-o+N2dqT9>VTom{yu_7F=C8%g%jz-i?i`d6UWF5HbsB z3GpIhv(gk*$S7g}ocotI@5-Ydj=F0qys}GRcC(f!4QA5&a)IK^`vJ6Xc`QyyqjB$< ze|}_{>QE&=eF6c6&$Q*4g#YHN8p%^lD9?u z8jo>30H)QRSh_@irU9Hn+_<(fZhpdCOj7^^_HYN&JV(|t(9_adq-WG#&2=WPE8D#8 ze5T{WNl}N9D-F$9q%r^=_Bws2(fHc?O2fY*WcCB^; z6laChAYq{5#pTg4?Vqo)#68&0m`H@7g8({#{+p6Qy5`Tu+#o?Up8#E`(d_?qV4cm3 z?EZ7*kWM530{)+U%72O}%>RdNl5*G*N9()PxKmXOWk0dSlaIp78A3oKqlTdsS|EeM zicW%-ERj*(@b}xz=G-RJo+U2Fp1prQ#uXprhzLq=o``2uZ=S-n2bs-W(FUuAmnBWJ zL?6*+E|hK1hP)jvvOR&dO1^vWC6>)tka)fh6u8Z@m6JIR8*^fS&ZV% zvRfsmi}4G*HAxfR81(dYl|H!XZ+qR(_@!yE&7AwIqo3PD>8TA!Zot zhed}5A>ww?bFqDxjUXt#MQ^-p9~h~W)#uCCoPt0*1lpQuKBl zUc&g&nXh7HkiYm0&+lY%4gdV5t%k;yhhoi3ZIIMJbrJJe;frFM>FRxCB4rC@lzMZ@ zH=U)vt*6gvxdO|Q$&?j{FODJN7ov=$sHXL=K?^2RL%`$s_WZmvGjl(8{Is=FzIEKc zmMouae8z^!IZ!qU$P!UAy@Y*@A>Hd6bAwh3eD3B7Yi*hAvE9w0866rpfkn@h=;_%L zXSRtaImls7-6yqpsPN!8J-UJx4{Kg0QwUFv$vn;lXGo~{<$yj?Md!Y2B~Abqr>Lq5 zP5@>U@MT&dx`%-Jqe?ekWO6g5m1?0s*TZGZAF)Ec6-7n>NTIxLhOZmr@MX_!nRaAX zC6F)brmUEX(Ao(75llWJDLcH0h=wEUVANxGJ=W~3NE!?CD6RkFm58Y`x9OJHF(aSz zsyOV$jja@9Q}!Ojq=jF|X2o21l7Z=(a)L)m1yqJuNqJr66UNYJ1`=sE>SV5fqa%s& zg#N>9BI9BTu%#c?YqPR+r@EORNFdZnPszHxs}Fjf6nXepzorPrgzc2`FPq)a9|`2N zF%~utjocb&%YfS~T8@%t&J!i<4k-eOKu067Gc6Z(b83yl(3wg;kh zDnU32$~8}nOnWAzVld&`ZH*C?eF2N}2t?9aXq2Ea>b68*E%P_gLBDXJ!lz`kpJj$d zp%MIYIxrJq8H?72R)vJ}F^qdQNWDoOK30%PEm1CWEkez_2Bv1Ror#cAi=a7f?Dl2> zU(7JasJ16KaE3BeXa`3#3-?^qX?wAK`+JpHlbX__(MMvB)NY=+BB|Az8v0$OC45e4 z4`?G9v2-3tlLwbUyc>@#rt*R>WL-RqYn@<|V|xdvoN;QpP=Q5zq(HwGV%X3*thQ!5 z$Bl#PJ^!k%?FWaiW=XDUu}0FMKfVzFrUaxLY+RSC<-3Si)9Su^{a)Um(1<f9_fg`{~bRQhg*TVW$XbW~d z0CWtNy!4v}xOe+MyJ$8oX;UT)Y@&38v>pX6mn~K3@{F|X=&T74YIyq< znq(RP-;=>i9#P4kKDkJu*mHd<5AYO`ko(2(bTsm{Bs8o(2vA za9rb1*?n{t4+d}@QSm-!Kd|KXY7IP2#-T&kWPMl}t*%o258<=k2CG@z1bIpKL!}QH z(cIBFK;;Ir(EXCLxnSnd1UE@)KN}uHI5!OK&$8HEsD@tVZ9`~4Y-fSTTTo;{LGXpq z351p}HN7=AEO<|-fh${}wLpjgAr@EwP^3V+R{|B_ALFcZc_rO5EFz*&BgKuqXWA}g zd(v#Cg>kp&O!fUh`*2ycBqm_qpI&4{{Tw|}C};>IoDi~UG6J9a`q?>s+0pnUTTD|K zsD((y5Uj&H0@wK2d`7`<&+m)Jonz-HBQS~Rp}3B1Z%dH;;Q&X3%l_;9So#ZqKlUHg zcG&=U)Yg}Um34mPfKVS24CP>Kzk!}HD7MYv80Kblad$N>Omuj9ghOme`Fj1OPdCa* z@bk$+rjgu}ZSXAesutYsG&?a;%$36H(s@wiLC~C6&()GYRoe+MO?NGZ@R>*nhX82+ z>8ol2@m?;{ptYGzKeXNP_pL49I-Q?sYciRu=Z;NFY=?6CndmT?h){#^pJWHte8 z4o%o6C3n`oQN`UIuA@r<6@x1r7^Dnv6(0^)qJk^lGBKt%n5A_9Rq>OG<{ecO4l)@X z^CFzEs#rk0vKOV_y7mMK5GD}=Z@atF5nf2dw{vn*v*_es81J>Y+cb^W0$j=Gd_hY2 zVPB|zhi>5wj;*%8CX5__Qo;1dn)0LK6wdOsAHNnf$c#7(?Vo&ct-pTG2K~JUy9_TJ z)fXqcVA+X=(_LhHPlv~=?rJ2G0@`=R7wSLXNz4#uM_6ALD0Fc_VQhPTcNqfImLcNJ zRB?G2>=0Ssw97&Q6v_(WI0}YC4ifNmgQOKQ`C#ns=NKVrXr!0`4|I6HZ6Pxmwd?sv zk3l~+e}De|Ek#TvRHj1l?}Hy-}c;!}!Q?IB`5v;z$dMG6x&%tg5??u)pL(rNT&JQ`wJm_V>d3 z4#UTZ-ZMNXQy?vWXvWT-pAT|ggyu-IM=Py}(M{3JsyoTglfJkw>AuS;(*bG77`g`G z>19@*t^vgN66irC8^Rtb%HIYnwoGS=m|GMTnjB%#v{QOOhw5gBE@nunKBPwF9zd;B z?;Uok07uU?qrvj0;M-RUPT!t0VUi(begydhL7O|Wou(!+(lEi|mEa(Wa#~N^9}zuJ zDYD9W$(*-a7Hr*zSEA$-&Dp(F&v^z&EHL(USMHsKaYXL2&o zq`85W*<+@w*$%142G((KR?yul3#6^*_tAJcP_F{)d~B^2*I~Am6p+c=M4Q>|#s+uZ zTyeS6S96h!JL28}H`ug~Us+c;4GYsCyzK5CXClrc!-GYgZNYh{=mIUO8r1e?9uO0y z&-=w#B+o!rAn^ndmX!b@QdRLL?Us2CC|1|AGE2WNlY#-C&P)Wvy?Q;atcp6lcN4*{r`EM;|X_l`YQp!co4Bl8%6*hBtG9&HTAL5+(bO8$my(Pn%8-vX37 z9T*oBE&Y)7UleY{RIoC`QHb95k(NF)5pDX+est-PLRMyp-b-F)+BdRkC+c6r<+oo$ zrbo4X^0cJ6Uw?`9nP-R5j;jU#jlyZ$w!X+(_SLp(cs%RdvKeTRRlcB|QrlIXr+bRWkS0b&8D&9RvJ4!kY$KVrD)05Imx3FD>3&->fg^%feY zICJ%T>Xx${i<)+0r)0LAUhduWKoHzGjfV!wma?ED^lq7LpNO`b(u8+muhWZ?9qM z>f@$%si2*cKfUifJH zY1QadFq<0A#d?0u95jtY_IXRD0DAJ_{bWb*TQV!%Ot%wy?vEL`pS+mfl}ri+?= zKuo{dW1Mf}?}n%Zqkmo*ofW$?{s6tTHsmpJVbzl?sX^?AsYooaADF3eho1IeRpA^X z5&X^#PhEv(*ce^J)2DCVzj~E)DG&|i;G2y=E#}dwe@F-ZV}IIJo2uP?O;K`&xxu$1 zDdefT{)AgS6-f&ej1SQjttC(_cDob-r5GPVRF#e#hMRGSN^Bz~lcMMIQ&JL443a>9 zCpjq!iG!rEk6Mj736T-heR3f|6La1xphLyMfgsP6{5Vrue6Rfc5CryRacDXkMVq1l zSL}W7e>0ws2&A@$SSf{^I11bw6ki#ieIO@CovC-&(#j90tzV;zjA2I1;%4tYW_}m0 zF*pAw5khKgMv&ViL~T<2`VYUgrEEBf2pP*gfJ7YZ}`Ibod=-ZU;3|?m6v-z zQC`@&h!i?J^Od?^s`egRB7{L02<)k{9xNB;z^R+y)c=P1ExX`2o{11jai*<|2RBo2 z(4ZUrN?~eSHJhT|Mu5=ACO5<;E#*>_RT|t>BL2hOzPu3s)!?W{;SOmbm-r1}H4smO ze^suT8xk6%_wM=YB#zbq=S4n_^0Gs!-Qhyx+;kxxB8dTYnMg0dRB$weHNm;Wm=8mp zW7@#MAwk-e9r{GEu8qf=2rXW*fT0#X9ab{2)d1lJ3f47zTz`vSY%klXKr_Pe?g_mL*hNVuHM4CGEncAZSH>^QSrH7 zgusv+d#^8IV0{S#YkwI8yZ=I&? z$0i`x>H|>Qm8tjz!VjTHatc-Oy)VKYlGJe%8r~O&&WlO@pggQX?t(M1Hp~et(*r^D zkTp#NWH0HScg3cR$_X`0sdmYM_pKwPrpYgkntps#=Jd==O9!M)i@G0lA7vE zLxjPRi=fM6rts=%va3^g1GeHG7Sk`hSA>@b@t-_bh~@W+pWeLXOZ5sP{G~Kfv*g}b zSqpoMN9^6Jf7=}@l!}j=TKT9$FcQpXJQE&hyP`lWd`XCCVqOrYgb;&a2~lDj1LT2T z=U{kWfd533OsR=-EI0iDZ<40S88?r}wr$?;^X&3Ic#Doe*+b2bvc)z=I!(%A=H^+k+SD)nfiX9uhgFuudH&#*l6cHUd7jQ3zrKK+hJ;01hUY^vX~GoS z5h+`HepOtoSK|px%XK;_#*;9TqwBdR?S^GqFG@>j_KRowg)&*KYbz%O z0VR=AIRRI36~he_gGbz|h+~X{7%k(v%D(v}G(AqNKXB6cqjSOOf0Tf#4F?Z@5}Bq4U6@HVp?XL;C^` zdhUR6Br&Z?_dLd#_ZVkk8-L7u3?s<}D?9A8P!mDbVW%C6sE`q6c0A#7UboI`atM>$ z*S0ApIqIS0D9Ph0n?JaZdSUYkW?A9JG)~Yf3->kO6iJqc#R8j730X$uBU z^7tQXsuZi5+gcCilu<;gt@`uKh70O%$-W%c-CycQ;jn!;I+~J1gt_CoC$40Kw#6Po01us9 zOi%th!6>8we-IBj;XqJA;)aq3$@1j$R~pP<6P!&%WP*EpbQuVu80ww}A5Q*u=%;t0 zPuYA%T1pDX_4lS`qw}kM0XWx$KMQ-#BVf<_?2GjN<&Y)00pg-DO;V+CjSdNwEd?;Z zMK@_d#|lq(x`4hlfdf@wO>mC@_dKEw4HN(^7ha!Tf3kg=T!}zJwd6qH5t_)BQ?WHt zLyW=5hKaU<;zmP2fg`yLsx+8?M>a}~*B8A1xNLk+u6=?^a-ty_f8KzHMi)3Nh)ATl zwqeUbX$Cw)C;*hVP1|)bhtj6FvR~Uku5N5>K+~f}r>E{f6F@kR-Cz&lq#r=Gf?^!{ zO7n#(e`?n`B@yR-Q?*jGd>y%8JM<y0$K`1 z!z19sRi-X{*p&`;bixTgF`PQP2LMGq5m1mElqAV%3nhnOV_D9y#UEe`t?;_;t9x^q zrTNUQ&2jp|%^M~=T5h2UaO^fcmdB?!f8Th_yNeMESVxV!c5{civunNG+Xq7ZG$0I# zG=s*rZXX0fL4@d57XK?4yC1=>D{$EY9hwz48)xtR2r>>;M06_o&0imy^GJOCaPS}FTZLgMc)-n~A1b@6?@T^lozMvtxwH@b%`1L?b| zD&I)C9pC<>iG4;%V*jXM)7o9qJfnG;=eC`=&x%_|2hyyDkF`6H#Ga3TXK=yhrRqPk z#k2HfuMh;x7$O-y$5BIQ^SY>2?-XGC|Jf*Gte2tW0TTf*ldwT24>&k73NK7$ZfA68 zATc#Imm!1#DSypb+lu5i5PkPo=rMVbrBYQ%1;fH#m?V(EvbzsShI!aa6P7H~VJ@)w z`<(LZB-8Eg8Qar=hK9DKBUS0t)wZP^Qm|BlNyu75<`is zOW+D@wIDUa(tUxF+ro~4j5LR?X zhT}mxL02k}PEefKrQ_7Z4H{w#5|@rGNL_l#fvQUfj+$XO17ii}a}BJOjv&1x@)5#C z^;Int=YPwX7R8T^`PZi%VhtiT` zu*x#d&K5YqW`tFoB>PctVK_TT=ZqCN6D}G?SOdezoxxB%SQ!hLF>eAhi-8ib#yU)4 zl{TNM0rdCt+JdG`oz)T)egljICD39JqXebMF~w;C`&!sB7L?rq;2#QM zxWE*4ggtRF2(4J9O*#WLT5)!mGpIYPH4b%f+ScIZ2oFC}9pVEwm7s#bhbqIH3@fX6 zRDU(Y1^Wn=D^quJ()I8D{%aNb*YE#Wb^Xir#r5jq`U=rO2*o#D|7LZyzPbFkx*7s- z82V-P>GS&+>#yO=2NZ4!-Of8~af!8|A;28>?AfsWt8ivRT)zqZufP3{ShLu~)#sSB zgo~RmU(WY}a~i;^8VM>*0&|rpAYcQ0ihq-ZlatV&B4h;i7?z#F(r(soHyO;`-N0-L z-fwS^8sysy_Um6?u0OtAU57L9_UbhB?^a*0!@r{T5L(BC%3?xw``GybLUj(IJHS+1 z6f5%yy+_*?p&L{q)xgc~V3^5wFk~Y^^gf06oX5P|B;GBTZQgU5;m8}{$9UfbE`KRS z1-r-P$hXGXAQZW7DWNjMk;mYN2;Bh|;z%|TTtuU%;NxPS#(A02EcW~77Z+=6bvE=a zuFIClVZQ6%-h8+o?tlJ#@n_e+SYLixT@G94^Zv*Fr~c)chgLdwWj^8*Of5F^KBR(W z6nWu@+mdo5R=}PQ%)Jf$59@d90Dt1YySn+{KP>;ezK9&L!S_zhJ=4&f+Cg(_$1X4% z#RPXgGNZS#(RHcOX$aP#^E{yOOKTC`G+PCa-quCNzkpU&n`nExZ~G_uqDn+NjqbL^ zgs$54$~?5RQB@`?l+(x?>x_Ig20!GDJHXs1X86w}Fg1x44fJujZsC#Z7Jpjmx+Nbp zUloDN;z`e_1?Op|VA(bW(~LOS0N=4&YOGqP zfr$~dWiPuMOzkIhpAi-lI)AR*tcwYqQ+#+%@!16ys!{XAolApn(?l?uLv(~kmW5RzFegBGe>N^I3%duC*t7P5dxG=PQCxmf?<;cIoaDp-@L6j7af4I+3Mjy?X8q3HFZAR@KgKsOO?til|za=?H z9;xH1S0`4zI(jQz=YLd{T6VP@IeP7!1m=`ut@X$VZR1F-Z84#9oUYm<%S~6Zm~O zHa^FnO`6Q&^Iqm_w{B$LI*UVPKD1+O85)$K@c9#YDgNoNMzOj(+1#bQ!{xkU5|beT zH;&ami6)BvKqu(7Ymx`m7&VAZXM}Ro12%~J+Q5gNeD8fTS|sXcHvUZ1Y-!wDdRn)* z!Oq49v3ndWKBq1@vs6ZmoP;FPXdbDmw`nB zEq{b1%y1%&h8H-|N+h%O#V^0nWDXC%ODGY}CV%^(YaA|!k}g>#?=Jq+JG-$&D%qWV zG2O1J{i%gAgbQ|_sf8AVQ`esQ?Is`3AEr;nTxueWNygEfAN4C`GF#0Dun~?hdWwTR zANd3u48GGMtir-cC+{pscH*VnHu-@l&V?A>+MoH*yewjcarG!~Yple;W5HrwicTVY$I>uFUi zKZX8f8P+?#-CFK^121qc)7dJU7TKJP#*&&~6T3I}aJ6YxOsnN4$3BtHMq`$4)_?cg z5lgGAjE=gdqqIHCRT!gM-|q<3^{3qR8IgobQIB}jXM!rgs4?G0+i5ki#l)mH^wSKqt0;yuNvTd3AkMfEjDk%SDwH zp*h24uVCGm`G-CTdSmS8zL}t@oqv9OVf6oDVV;e~&gis45?EZC^f)lm>omvtU!mrcG`D_bxSotA;p3 zV{96S)eq4kmPpG#4oH=P7k?Y$X45=?_B%jk(k$GpH$EcgOJ76H>k-iPIkc-9{mp$u z7%DI(I~7>WwID*8vGnh|4K>Bdg1v~^Tl!a|TcS``G;eWqMh`qvw4QAtW zaJbIKd;0P2>dj3bAX34kra@q7BZ09I2#hTxFkUe?ud+I@!pkij9^oy<0OzL#6so4| z@>cQ&Z>b|j=~H+MQfJS@TUZg=!&}WB!U%8ndFw~SpwSH#k_LS1pxO>3U@fE_dYHCO z5qXj}j{O@=X4W`EnSYO^SSbI+Fsc^W#m>Z2DQpiO%bQ zRNduzpsEJhKTocg4l3}LWcBtYFqFOi0eFZ**q^7b)Qd8xT#IA@Wb|I43waohf z3=~nKk0N75MfyX&`W#011A$pqhCA41^xA7X_$&}H&pwX4@j%CLGpja5IY2~QGRn`z zLt%-iE<3D1J+j)dJ6J(`5b3J)nZYDhg*0;IlK90&U^{2 z6!%pFSDXkC9w=a7^&-%cI3i+I;+n%HBlO`k2BN%=Tz}xBg)q>N`qdz_VIb@Bx-}g$ z2IdpqmQ^vp4|pOsI8j2R>xgi#{x2>lw_|qcCTxbQ&)A(_*^L z{0SM}E{e^%^?cs`-#|OREjr=}Zh_`-!-z1CZ8#{QLd0+Enr++U@QJXU2IdVODN2Du z#16RH7=ME+>owYN6d%+EjDzY>B}JAj9`P+C3pB&baMjkx6o(Ch%tCCaGXQw#T5Fmk zPIK*pnZrjz$th6MShHBn7oa7ItB^K!ad7KHh{Gl@ef~TQ@IBa=hD#%;@HPZh<9R!% zyn^_6t6{L=Tf`$@!>HZMdH;fUE96p z>``Puk<0is^<2@_o$I(u0v~sC%{ybNh}&P=toZz8Xuo~``gLf<{ZDw&|IaG>Q0VdF z_SF*qZ3Rq2v*kwz_Ldv*pIHy0xOWQ!zid(b`76~D9RsP+f(bnAVvTyD9T;AKAZaz_ zl7C6Rhm8tUWYxCtJ3t-}sRbw|Z2wf){`r1fGwhy*TTEuFIMN@Jv($e+-eZ-HW2WJD zj?ZeB$O)na#TtVIF~~U%MGlikixvk?K~X)Z^%i!nQlBDP502u4+5otQ2hqoi7E{F8 z;XGt5^UeT|8E3hqOPH z<&C=G^k`me^I7<}h%x9lq!_mGKKoQr_O{F+3R* zlGsK$B#$VEikJO+4Nhgktn*6nv~oxh1M7jdOf932mqSq4on?B?VOqx9(=UoF>wm9? zP{wwP2cw=`?J$m&rFmpy0leVHS33+!5XIPetDT0U&1y%%>cPI~XyHIJpbY1$cAgI4 z>D3O?+8?y_RXcDO0>fDIP~(KICl)cfUBonthA)~`L@TY|P4oGxQ_CDGIlPxAr}_uc z>1ds?5aN*ZB`kd{J6gBi1y1}I%mem}LYJZ10}}!;Fqbh~0u%!_HZhaY1u2*GO#)2; z4VOYr0$hLQ0gqT3r5qhCIaf@^X|NQUsfZs(xl}@hUm_L#cJK|4oL;;^Z{T6cW2R(! zF!R!HpGIIQ1cHRousAi-I7CSZtEJDTL6@2t6ZdgDc$;&f6%(*xubWi`S#E9D&YA{w z&VR;qSI^nwl0}K>b^iYJ@#CB&A(@T_Ek+P8I}d*~Tifj;$wj?@ScQFjs=C{h)h#`g zYqveH#~ggZCP^lxCX>6YZEevf*L+A*C7;EaFf}ZA1Hs(dS7fwqC}j!6Q|nr@C`xCd zx>Q_-Z{Ommj(X*=5*-iWsVj0>R=a)2{F-Ax2GAypzJgkTw2Vz0X1$Q_o5ofPK-#&O z!c>1%)3|GFVZTINRtp}wU>O+}DEDw%Tx_Dkn|(*0z-rwHdD=eplobUT0kw)yaswdK zDM(Clx19C9+cy+W0JkaIe!9xLvS0H;s-MWEo5;K8Zg1wfu0vu_)$PW~aDox0))s6-0vBrG}b#Opcqv0z%5 zF|Fb@C@B6#%)>`|IB&Au=H16XR`gikES6e?|JuBpB`O{%mYT#d7T2Ds&axacX#;;X znaHq)R9TbgHWh`cth|K&K*76o`ruMY=dz`lO*YF(f(vF~?T0vD|9)NBhdH1sVMe86 zx|=g7m{4MB@7N)wB}@6BduO}Nl9D+FS6cKuqkL^TdMYq?W6a_V3nkXRDnMv=xYW=> zDDo1EE%&N5y|+V1@yUfqxe(zAs1<+K^dr5?vx+{=wypp8hRD%R=Q(--DDdNo!D)bk zH;~bAYb)E7`Eaz2YO}AsSF?%D!u!x>O+M7)`Wma3MPLX6k^;I zj_mjenT?A|OyT--VxYUW%Q}1KsrYyo1MH!Nc1a8Uj1b@$5;8JJdyn&HTaMn&Ov@ot z4-kYAc$%Qz0R-1OOqK5!P{~d)k! zQZOX?Wf{AaMjSl5{6e9=p#Y$KPDAOPp|yfUm!TsSNK|j9rm^3t0hNFH8C1U#j~A8W zMeX(&f9(ANkBbKLMa6x=8DPonmb9q(fvMJh+uJJl3XZyP+|i-XWXR+#O}ZkG&_6>7 z)E&tP9S@n*;2}eLctAC+SLhH4ku|-=Lv7~NATHRJMc;UJ6Y4YJHfy^%5`5C(ZEaRe zt7BjJ2DNYH{eid1(7=Cv>ASg~sfi2Pskf8-fsIen(Z=87s@{;CHx{t_4bEr~eg096 z`_x@O$>lb?n`ZSdm&q8{3#wzbEd1veea1eJfFWO(81N%y)7E$P8r!0}IDoyFVC8B= zCRN>i5Bg#XdgAnZR%O7e(}G#m7}8izk6fCe)(eEg{l*rr=s16G_d~aZn(hm1+x*D@ z^61*2J@-EPigFzY__|#C&uQ4V?jH*HHrwsCIjxK!lT{^}fQH^cXv#GSwCu@jNwCz3yHm2hMVgz{AUmW~nCwAB%zz;M1Ce40JXaz9gDme&$-Y(QfsI17{IZZTY%~ID7)sj#VMuVZh2v`*IKK9&_94s{ z%Mv6!g+%+S)_C0KGE9SajKT#RlnIYPx)nBImm?@rMtDIeuNpKInUcG46*7b0H!~!?Y>a2NIFdA(Ro`DlNSoBL) z3zBUt0vI1jsq74oqaihneS zK@z55lg8P2eBz9!$Qk&UoB<^Z?BtVZSrkqu`uNfGfsa5Rm>{jYDjK<=u;(ZhjZkba z!%>?ZMW*RLYJss0u6tH2p$HXAf>}No_wA+Q$Zp3y-ZL|iV_n% z$)vEu#|q>p$6ULcoXU8L%2>0sbaTq;73v~v8EJ*0)_Q(K?{s&q{_YouB%y)S-w5Jy}coOBG4uC`14 z&)i;HLEI(1wT;r}M!FN}c7ZBF4z*<5tVmZ;LhmQy+(Bc1JmtaNVP+kJ7HA}MV?kzrC0XR8QhGh|0xptjg0O#jEmsP{ zOP3mkFx$N#0?pfZLb;NFAjyhXh5xu+1B--Ll4^IS@QW*^t<#S;{eVcd8MLC#Nb6$i zYJKs@5aSi%aRp603a8=HWkV%kVz+@wPdv0n><_K+&mcShw+pfA&K`}J1JRIR= zz!Y);;xHy*F7SLBjP*e8PJfo~PUzjRzIgXFEM=K6S$BKhSu_oIo~ODQZ&)lzFe3R^ zq2gp~x8+Wc^I1?01+Y5ygA3PK6rE&CT4muzxjYMosbS)|CyxBl%c?!fVc^AH4i5#~ zc5yTdc0=Ftz6avyBnTio2orUCv&N*?1>Oh@7%&zT0k)_(C5prGw|}}|^RW53c_@>I zaT)_*BG^_h_fuD*z}=cPrCFG#x!*k=ijk?8qzLSx#-XFnb&B&tKk*a59J4S6a*LOM zR7oIU&7?$VKZM-gl8&L%1MPvkyO~KEvYepveW*v&Y9%y|(@>Eup3yy%%!)wbScO@x z{1bm)lR^?I=*W&6w0}|1CK#wuiY$Si8OK=|nOL4_c{4F;A|34OZ>Top(?lxhiWZ-b zE9gnsvLKxXt#bykPQW}c6SyD#oom0$k6#<8_w3=kk&L9j`pS+JK^s=y(bRLs}H zieN|0j$aMCqO6?gGfKlu+oe8543|9XIMiwKKMNNWm*fkjr+*P}3(QupJYWleO~;`} z!G5c^=n*(WdWUvokB$P@@5rgY<6FeVqF`IQaj2gsv>(CQh+z%R5^Q?=%|oNJ<#R6) z6-`$s0wT}>J7m=9oy;&`0#qGpC;01RMtDnu{$|%Vz|QtP-aH2w5UN@jLK>PF2?U^@8U*PZ`j#v+XGcl@kID`ZsNQzot2rN1@7RD?1Mz z+UE2=Y+a8n0PxFksGJi=xJHcBaxd>9X?vmzH+p73EvB*FO^6OE1|;WpsERH37?h!d zsDKy5DiO@0100)va4Wn}>z{M0JXC7NigWv1GSalY8-K-m0C062FU_Z^zmgEpL?Sj+ z4SrX>6y?ZsYa!9`E)F1zgAW5nVGqAJ^MQBV{_PWwA6MwD#=(y-Wi=c}e3KFNV5jW6 zZPmF-hOEQE$?FYc3bw!#nLKpQ#H9i2H7O zUX9eQ`u9}L9K~=?HOm4<5~ZL=$P=f?Y0EQ(>>)^~ng+1NB$zbTvf$hA2TE0Vz%K(u~jOsG!4{xIBqXkBL`hNoF6P90Wz>O~+D7!)w%*FPi zH|sV$<+v@larItIkS|~8tP{BRNCc56V7x0Z(P*k}k0B&BK&FF(#=$n4?={&$)njcA!85(|@PPOY@jLaX|%ZoSfEW^&V~mVCp-vRM=L_jzXx_ z`$O}3(;vsWt^b1oa?1;N>1v1+PE0EDNP`cUvzg3i9gvmoi~3?u!CMGlDw*=zDQwL_ z7bX_J8E{Gj0Niyby*#6geTBEC01;roGGauOtx#4+a|KUrYg7bJK|0;J#(#g27&T`^ z{AXdLRK}UMz-SL!(c|1A{W;V=h*KJc~22~wm`!oLdBC|TBI zR1In@Wc#M+*^$un_A-0QI&esrX{ui-&Tjn(+f1I}OVb}C11@2q>(h}mwD#gTlu+_D zfTwf)OO*iqWqd@m$j}UM1K|U93T$l9;crzdrwPohov{l0J9Mn|zkdsDqA*MJ&;MP_ z-yGK>9TeP_?>>Rri4(&Kv}7-kip4)>yI2R|TL2P1tJt)tjJ7WS#8X7a?@(+lV&#zI zklyFQ%GWkPfJ`#etS2hPPG15z{%b!9U4()8D1SP*HRmCu)7EFthGGbe#n0`jkjkhE z(Tu^+tnrKlT7cy8SARBTrfvc-ATa)^DTZr4e2V8dLRIb-d{}YA5CtR2P&2(lyVq|6e6WKFy+$ zx_*Ck4>L^o^Lw>e!s>1(sPLpf7z=NSF$y^!i`Ei~%6w+s&gN->=S8O`(E?^~RvT* z43i9nz8G5e*ruCATM`D=lDnvXsnTaN;bMS9^7|)Y#HZD9g3rknZg8-F_Q|wrR&BXa zwZ_5OBo4E{5e7ctSGbo6zK~m)gcD0g3Hdqe==1|_?0|vq!{&&b?aJ*~)7DMX4_q?O z+p$es7ftMhABc}aD0?pB2n8O&=ychcxS+P(#0Boezu49T1+q> zfnX*t1c6ULxJr@(5*W2!9L>1^2!0&Eo`P=yxHAwO4=E-H5=2}kq*%ZZ=HaQ!s53&b(0H`OO<3PuCOpg_5d9|^p}!jTnq z`xw=A`Z?D&2(B-j=j;7{sEY;i{2Fwp5lR8$Gk0|-DisFF5AzFED$Iw+mK;?oQCV(Y z29-)T;)46sRZPPW7GLNthCf8;=q|3+Mwvi0lxrtrpu@>pJ%MXmPv0ZiMCIz`Wk+6S&bBqTo@ zKNg)kNo>#>qyaakdx!qb;g^sIj+I5_CLurWx;#{Xd1}DVULup>N4eSM| z^UmwAr+HFlAbMOZ;kOagMX4FL&|jHfT^kA}>$WV-`pQ7nlYPgk{&gzwAfr*;;(}8a z{s;ZI?jcpSm4#XBwwXE}3`FX7)agQ4P~=nM5X?kkgkk4@Ae!SME5Y?>>T#eq26WfA zxG_s>+xlx8@SFb5=Ed5K-I$Lq0`&MD#KxY@ZfM=Yn03fhZs~PSCl`=hWG+~5Rj#V2 z>A^-ux!G@9mIqbkDF~ygy(nGO-wmt=WoeeimbbORq95|jGVL3wraP-K7uEU16s6he zg6)0!WuaAnO(-E4vhbcmNNd0RUOwdNt3ACuK$Od45b9{NaRX2{Q&zt+gN;7sFXn)l z6AoAVT?yM`is0sle3RyTGqb_zl$v{a=53mAx-)WvV+W1 zL-MNC;P4w;iZQKc(3q%onmc9B?*8oaQBp z!9S{o^#g(On+WQ`{UZZgIE$`NZCFETSe(uZD0ZWVwagaYA|GwF(%ukX6ZR~?z4*ToPRUUeezueqjgE=Gd zyb*d;cRYh8WGvV3P2E}kZZGqeG4no;(8HFGe;F(f z`1w@!M*sLgSGxV><}-GR1Fvj%(xDSj%aW)=7gBs7lJ>(rE7kG`qUjf7P&d8I^^;3Y9zIQNP~XL#|zinAODWz}fH1O3Ow9 zEZ(_kI+}iQ6OMxrCbx?L_$XZ7&WsG^C`{#gkZ^{V(1%Xw6}Aw{3_3L7)J*90d9O9p zJ|1c#%gfWL@2009is%pHdayiAeXNI)AMZyJIaxFms^%)%2GD0rxAU`De|GvkP}#+r zXt4&}Jer=M9QyhdH!2FRYD#0Y?0ESWdEsZFTQt{EmQf{W0CFCp<`Q%5%%f4(3mU$$cE3mXSkNWxc90C^#zmE30D4qjCY>1dBe*GWUq zbiyHyA+>wQ>}zBR9Iq8x)p*{xD$gdfG%$e2kBZ3!{JQp(sjrghDYqoAp|h~=qbLM+ zFI)gSs=S!aEu*q0asd_9#G$-ihI|doy9=Yc@A@NID)o7~2+mTgf3Ao2Eo$0b-98I_ z2C}*46N?DKDk(5efi+s3kVt|dytJGRor7+6GRf}%%7hv8cQ~~GkB1^>&XPD-*_{M= zE)1~PY8ZjKkFgP}Vu=lqfGzR})XE`=>80j?M00~sO3PD9WAFkvH0g+jM|~u*HvaZP z2{y2*6rTT)jH)9GOJE^pB(+mtq9kGtKW;Tx z>Rbg?X$F34nqf^aJ1)mDd`H1OOuS>qDFWvBT2{7TRZjFyazMKjo~io!#GWY25ggbt z{YR!Kt`XJYG-H4hj-71+ge@Itu(Bptv%E z1Monh{Bw${e{Zs53f*cg%thq!D8}!k|6mhV82Hszd6Ws|IU+f;G+LM_Wy#$cXudny zT+42>P^mi^WiEIO^7N7Jwa7q#1!MrZ1a9c(R(KYvFcC^SvD_F0mjYA28wXSGI6+EX zrUZ_%eb#HJ6}yX_O~*^&Ofb?KJy=$D2@&k+wPpu3e;#F`vn!5uOB4At;gUPZB-fH= z-aWgPUIgjak`jRHk33l7-hIe_50XHI85;FhwVGK*eh^M}gY&QDs*xWewX!@Aw+a|k zQ%O4WiaBx($@Stm_go8O+K-L&*-WU&If^xY6|&SW4go|lqe~KLR2qw%0I6PZk)_wi z@|oT-e@(10L!LGUM##WOthr)(7L}qj#bKtznTs!?2_#akg=k4XHrh_rtr>K=vqE1E zfae-yX0Hj(B1Sot#K>z#mV@yNPC*V{!6`Z4({wUtBuq4#fDjdDAkgFl1gy0sNwK^X zn8@X7z6^vd7^8~cLq-N{C zSY6qmz-oa)NqU7SadYG(C6)Te&ZQR3v{IV6&!n+)7Pe|55_<=<(0(~sFn#nqv+xDCehBdj3I zSxTaK<0wJi)+41m>Y=X4cL%xmH@gw3(Ec(I9uxY5;1UswW(sFnPp0D3isr;RK|PJV zO`46RP_D9OyWgc7Q4mrxTJbzmbpJN62XvwltEO7wps98uOXE-f1~@rLLD)fZ^?g^O@Nc{Nq{FM=te)6ZKYQ;-6Zql zXfj)R^bl`sYDCw1&!0Jo%j2)5zBxNqn)|qfQHXv~k$&qxU*P9)_Rc>aZC2WXFF+xZ zW$%B%q{*xXqET(+UZo7}$D~{i39o5N$1C+^=m1ZhJE6m^_(HfRu<|DXf5_jFl3J&D zd_Asy$7FsO$Fp2?8)s6yqj4vu{$(fyi9;#12TM3>WA!T;m1Q=)%0qwq-kD{h_Gho=-$8i6J@O9mEusC{C+hO+z?QQ#!Fh^&OJO8d zC1v!Qj?=(TlK8tn;6(TW`(~o?dCG<9Qz1a#%poOoevSyi(lWK+`swl0{{Y36x_+0T z+5-~;Fg2F}8v_&qF*ld-umUN6rC3Xk+sF~V`&Z~l1*2Z}n{<%FOb`zoY!(Z6ZrQ-# zP^}TY6shIIguLQnPVHfr+!yg*W;_|N;glNbaVeE4ei~-o1flsyWyPWT8quY zVZ&9*IL|jmD<+iPJnT3BO^(&mv3}X^1kaMOFCJ@${X99eTb^{M8yKm7H0j53D{OM4 zwE~uU>JCRoXN$+Gt_Iu`hz%|6I3t}F7d}_}NZbIIKxn_gF6=5gqjLTL zqieSRd-yjzXU7#&$`Hrxd8pc^sJA<5q(6wK@RpvYiq27|X$R_54YgzM*mkFtN*<1` zcS!xGcRHU7NeID=$ly|c*~xu*e>XNI-q_RjrA)14nUHZeNK0Cg6oHZFt{Mi{-1t}Q zL~fYUeEAsSRc3)#=$u^9pk%@j#fa@A2nEE2gM3mHF8H@WTTlZ}?83zPTvug*tiz*~ zP4Uq$MBwXdgB`5Md*Ax2%kkuhF}Fv0a-zq1pvU=wdl-7|xtMVelZT=oe^63fCq=WT zxqa2Sl4eyn1yswjJ;5XTBT*TSpk=ql<5P?Fpaf6a1GR(ehpKs^!H2OL+y^j~-o3p? zODWFByPt1A&?-t0+yC5dGoC#Be0zuEl$gT>&c z^~gt{Q??y!IeS^O%{~ykf3`rLDQ2XJHg$jZu|K==#L>cNY> zda#4)CSz2R$vh8F(R3bE(4Iicdxu&P20);r4p4Mi%v zNEzNcp%c8B&)>0wekPGErqZ0a`7P!4>X2L zn`?bV8CEl8)e0J{f&ph5WEwVC!CSgRQHHx3=k{JgIoUW&p`C`A$k9K8inwHXE|=DJ z`|Er19C8@&Sdg!G8=LekEmZ@#(FyM#J$ZDsvUGhW@;Py7$ zR8T`cce7wiPbz)|<8vm&4u%&OK|V1YB68n8$Um=W@Z_nM*{mm-gSuHRb*6%bX5vSf(Sk{p&(!Sie+a(YduO2jz8xtu_AKmT49(!U zy#gJ$3hhz^92P1$wl$%?B71<0zQ%Fte^3?(^6w-JB8SumboafK;hSc9E+9Yenpcq~o7t%ZbVJL4KwGy@5R+>9=R`5# z0&1F|89zYuHL^*#LLC7;PR zQePu1ApyPMQ5EcjaylFB#l&~fpnc`qlxZcte+jh^-n5uQI^rf|0_%Z0zgWoutkJ#7 z|3tC{hP&qqzkc7gD3LLh%fDt(WnJ{EX$0iksZk5uvk}(vd4TX^BICOr0^##E?VSt` z_kI1N@bi>c=WyI^(j58NZ*a@@0qf_|nLM+fhr++e-eI~^Hkzi&ZlRCAS za-(qkUsMMC#?B03l~k1buX&9@JOA+vHD~E1pY7Z*qEm*8;cptM4;lE`b?Sn-GuA9D zWh|D;2zHUN_~{r7aV^G`oy=201#6>We|=p}fh^*JB_V38iIOvQNdmn!V5ug{;2T>7 z-kvdkX}jM;ItCdnDEG>Ssj5^XIOjQiV6jugnE`2dE6x(1WxyO3kG_=dJX{?Lxow^p zwR^0>YRuxFTsKji`GwC(71(`KhQkFlQwjn7W6`fg<$}s2EoSLb+e9mD!4E6{GrfWt zAMiECX5ymi`+{bkk5w$u;jjt^LYUVW=KVKIM*xQ@qi7=1$9xs*bUq2I>QH0Sm!N_I z6n{w`(2!c%;F^B)r3hbnqLwbl9#ttM#@7MmC_LA$4+rMS&un`Zk`nBZ?PD^9WUho9 z;-k-%(u9Dn99qw(l+ny!$u;=$e$_KM{03Ea{a2v^(W+hN5Z==10R8&YNe~h%RUoZn z?1ySVdZ{JhAke0oK&s8vXK){WA0a7`9Dl#5F!79kG#P%GGG@^d(`>ZX^gufIsAZB4Q=b%vDvqPtdiDT*P&!r%;gtXbpG>cRPaip{ky%& z0ZANj*U*Z;0JdH-aFxJ^tE~EpzO`_I4Bz?YciKWe4CjMib_dWkExA9&Vk%Wr^+W`T z#M0PxZG#FZmtvTmtHxkiW$cO}q(r1&OMf#yrA%HryC5FicM;DNgf?8~fY1PEfaf;l zbiEaeP5*j$^MAgEBs!O&+5-~^-GgXh5(9*1*m}PmtWt#+v8OyE1t<0p@U`j+2?lmc5$6Aj_bTW zam{tPIyryEsY3XCjN`^yOE{PBSToig; zEtSy8%YW6$zb6##x(;aeDG3?X>V%L^oS(e^=sF2J{M~U04+G~`i{=j4qXPbIoOdUG zA+)hM>PteoH-?tq<15tPyH5K9yL{Vx4_GM!F_L*&pDm#HchtO+-uzKgq*P- zloA(c@u)|FfIa31h%;Yvh@MRKj+HWxJ5q4saex0&ayW>(yrZX8M&U}WN(V^QgI0mU zCOprnQfGYx90r2YBLn_8IJO6DU!f-$kzn~dP;11qW8dGCW zq}9l;X&qPDwrTxOU_IHuWFoQ`KYj3NGDZv|hQ%mj`gEcw^wX$jpTF|psL+DC`hWX` zLOEG5K|(+1lTD?$J}cm47)T2VXH$1n%$aY_g-k^O3)rkt_ioisi8tzPRMjc|*Yw3{ zjhe?es0X!9(pLAC&3Z^Y)*V!e1dATV2;;I9S{_xit96)b?ANJHqk!{~hpl{@3s!bQ zT|tc`nJ)x$XDig|z_z=Dx&pQd4}W+qq@xQ$!VMo~$M=a0{UhWAB0Nrx=LJW|8K}u4 zwUhUV?=!IE&tNM#b!SaAr(mVtD(YsXu5d~SYE4-sno_lFN^`w7?<3D8sJY=2=#us#=xR3 zg9v3~CJqEMl!`v~kH)eb#<6AM7TQu2H#>Ez>;IcCWtBD8xmvPW zdbiz_88k8ap{}o4V`5y37seCS(FK0%ia8gO5(83k57(kp)rZMh6Mx;z2dUwXQUl6{ z-$l)z_OiizHb@KCrbiI<#|RZ>7GZD%g}zIbfP~BsKn!*Xg}8N7G1>=Wn1|YpEKkqS zwa+`_+U<2@{z0UvFspHu84j>G!Hspf*_8SKgK)aAjyjX3c@e6rK_O}wX&HWRIwdtc-spX) zay@z6R+Li%J1>heOKkn#io~0!u165YlyyLS4x~?k3`CBj=zop`Z2WB`U5${aLuWta zraOB#-Pr~8{C&>uzBAP2BT$zIpx%EQ)cfxYwd={hjxf`UVt-(rzLHH|$@^F6fmd!O zuhjdmnD8@>uX^UPv#&irKVG);(BSz^|NMD7MRbpE@bz|dt-BF$k3Snv?RbmPHmU`C zwx2yS#`on6h<|QWwekC4O}T;jJ+o(roPAo^+#|XD;E^P~=KWfgb;N2d4^OM4hm1DW zE*)yIPJ(IM57;8e{?PiSUG4O6ram0J6VV*)HbN3p_nW0;0ULPA?JZ9mZ;TSL!D-*{V*QW!h?iTJ-PQf=?yUKXa~ zt~bT+Ne3GF{zJgwX*1}B)OU=76P4dH*?plNexqQXKk?C{dn;>*XvyotFMpN6jBk=d(h5A zI-owOuch|Uh(b8K{l}IwG?Zj@ysPmW1LGaAcOfa7*9%~kj5>B_A%P+_{%{yygsDY7 z9|^r&o%|2qcUO&W@EJYca3XIh0!gwbrdp8YmgOkVxI{Qml_PWrS`vj|oJ z5xuKkBY)Awo8`qH7aVqj8E}p`O9VIU!cfmv#l>&G2eUPN{C*ZNnZ&a@Lt4y09uHx) zo84aghg2JifeTZr`pXpJ%IPUoDEXsq*`)g^bj^!0qF% z$q%~+qkAwi(C{prvj9w^xMFcEPMrnFMP%$qr+*pq9WoV4&U7f5lF>;6NgxemXQGxF zhbQ+k?P-c1S_Y_FsMM^=W}7FJqhLTjI+mxE6OR;o%ut1bX^{Lkm~iSzfx~ABOSI4? zZUh{gL)^%K>5zBP`ssbS*1x3P6$JC^YudiM{O1p8{q*1{Ayy;3I&Eq0@2xOk(Y;K zbG<({@SvSc{ovp2w z8+v~H<->TLWC^h(q$QDxM>wU>Qjdi3!qZiR#9Au$o7OB{V$Gya4tJ*rD%o+a(@~4o&_=bjGDcn_zZ~!hQi3u;6NUl5SLobn<_2q zgZaESWdteG)fIit~>eS`~Zkl{} z?v=+SlY;la&ZqYuf8ySF0_SkulgVEK_n1VIp;W@+D6xj>a(H(v%Bw{fgnv&2R}{gr z3#|z!C2>cR*Tl8Vh;K!&kL+(G=P53%eQ?JmF@%xUF@+I+G1jVm8VZP`NQZwxM@)YY zBk)QB&?6ki)Kp-~sB=(Sp?j5_Hzq*onAs|=8xv{xVUJIf3@fDB+HG>uBmbjUT_oK% z4~NXBg$yC!;wMfeBn!a(mVfBsB(IK)tc^`lCD`ooNHGRU4qk-?K-iei?f3m=a2lwE zN<6k>uhu<)OC|iy8mdZ7(KcHfdNy0gOuucUU~$+f+P2im+)%x`6h(6WF}sJPTZB3S zgORYjMAe$CDzbGBa?>WG4_%hP`gE2ocq~|;M@Md6-CREN#$(8tmVdgRGcr*iuK=@~ zn1Q!_NzbVu2^0Y9@Ek)Z$r9O_>{wH|OWL{wi#jdrs>qt{-uT6;k=FhY<AXNiDOOA*qw4(@VA>8NOJHP$LbP>3_Ue^MS!N~5e1F=dTNf+_u=flHSM zCj$lYHgct$@?uXyjGkuEHxvm51w3+N&LS5gcTXY*gvCij;A|kVoi>atQ*3efuF9Jx zE9vEPYU=7&TYxxBWH5aVdR})Nf02x2bW&CDMc~q{O(LOf5`ST1>t+btrWwMxG;N@& zOoXcz5Q`mK`jNjpYt5f3^`d`AVrD5HOaWFwJW5ER0YicX2BAh6PU-nBH8=`ZkQN78 z<+ZyC>G?c_sh^JwsRktA8b`79F#!c#g)e+O@Xo=VA@?0K>gD(=pA;@wDAY++fkdg; zp(`GG8PU{ms(<1@wUfOtms-vZHUv8omXEwE_ID}JX2kL9(djT|PiEL{_&>Gkk5sD72z%xjPc z%$5ul`N`ahRWOwmX}L0RI51OQzQ$4pr>tLi(=G~_N`E>ZyoSTah5vvGUOo9yt~iF`Gm7}vA8u};8uJ7KeP7{-6pCP;W9`As z_SCd(3Ked=@YeoSm7*;&k6;YZ-03g_f=*aE`ZbZ?tx?(3lmZZViqQ& zlMjfnfe5(wd1_x;vU@HxpL~oDZNW3bPu77N+BtAC*T?XBqz$)JM6q1)V zBrq=>64hcWp3~t1#F)Kp6utgvc z7FB!AP-kmKHwDXW?k=W#>TXTPO~ky!=zoJ_X3)?jWC88soiaXc?P*m3<(xLSJ<`Vf zw5CrmzZF=pj!kL?WuP*p4dCg~N}*~D2Ur8+{~hb5$Ox7WG*&Gp!N@=E)|3udW%~IO z_Y*9F@LXrKZH@vvl#nsC#jLVl%Y8$i@rJ5CcJ~pmV=M$DP;GYUsv8nUxfVxE=zlKP z-oF3&b~yoAl^S*fZ$SjAF5++B{QJj`-+8%dmhfK1nI0q00>|&(OkI)&`V5t|?o#=K zpA*!hAnfNDfF^S#jgTt4Pm9AY!|OU*fizwyjj-D?YW$n=ujeu57=WG^pJ%JZu#x9m?~1B6qi) z`-6pY67&+W8d}eyN~~_6z#m6QNTmP|xA|uFC+>Tpo5ry|2k~e?JaUA1g2*BsSUALE zUV2szC;{DXa7Z+64k9Wtv8kT0yRI!{MMLpXo9b6U{1HtL__Xu0o>Nh3%YWE&D(#r- z_>nN+Zb!jru3eV&L)GVPS=%2q)zR*Z*}U`_6wm?WX>y)H!&p9*fp~w;qVetj6XDJs zn#$|ZP-O4Fgq~`wV0d{}ghH9pdK3`ImgKl_5PERgZT*WQNjz)COym%(=dDVJ}T0tkP>H&GjLggF6hqEtzacWYhijcv)vgX2oUBxs>b0yF^X@Ykn% zdT&myeXMU0=&W)?A-CbK6Cx}5<%u3)#B-JN|;toh`4Y#5i?XKnlZ_?~$4<&c_10PA?$W^G+O zFCH%>pXbZAsxKBY*7Gf%_nWrZZC)(z(L_PMES$3lRHM0ONg__k0^lOHdL(J4czvWo z%b8JWZ$P z{`PIw{PuS`T+@;*m-{Vk{{z5Jm$bimw}&G~F9J3eV1R!Vuyu$4LEWWqQ9)lrXA!QQ`>@gB)}kawQGy2 z%r*nWgj6T;zT7wY%5k}-^~;Z+Kfi&8+oH_smw)a?Dg~s{jO6y&7-_xd`roYJJTc&E z0&?I#gYylVB>@*+qzqxi3Fxn0uD<%9p-fmnoB4lZ#vzR?gkgyo1v`eftrI*RLn{rG z`Z94LVpk6ew7!tZJi{gJtY9(t^wNaMIO$N_govapisBBUEw+R^?Qk!v&3;=FxkqGy zj1i#4xnQa4FzrLMphoImAa=1(k(u8=#S^c_H$(u2tq(8SM|yUCLY@=D8A7wPGrY~b-DnjB4Nt# z4hr)_);P2eA%<)qSoh-KTX3ZYz|JH~R6)p>Qzp{hRiE3JT^`~~0s(Da zmrv_nzL&S*BE4fp;}R=#CuLiJeUo_(-Dbk)_Pdx@Yg{8|PZW}}5O@PMruJ*)aMXX9 zb3ag!iJ<@1zNQeHFzT3Uq<|>yL!&Ey!Ndqkq~&uXxLC99<{NDKyRuc4U=Dp|60U-UY#SHif&f}4LP+q&I0Zyzdav=v=s!ARsFl~8gtMM0_B1}!34 zWYokWAE1-kpg)PZHH;ou84O@kS8^hC36-!2T*wc)a1}spj!l3h1F;NyGhf%$cHk;l z&|tQV`oiTd>s!VKQP#*zO+8WgC3z_)-Z~Li#$RuHRhZ!+6zzz#xVN4ugKo-GFC z3A7N=RJoe_5tt4T4o2yLa?Hgre53~YubF&<^whAs*iobqf%kha*vY8&xo-msuTVo=+pbbV)^ zu4)*fwpqEvDB|1>WyJ~;97x%`4~Ak46<_uK@br^1!IG?}l8X1Q+~r?BKm zR^n1KJ(MA=SC4P3S6Y8Y^H1fFBZZjc$h&>f=6AcSZu0jZ>umRM`}yYXGRy_IRwDeW z86NNqQ#|%6RuXl6HdYGg#By*=vn(JPg6&$mdwR&rAms=Kq4;#in?h|cQ<9C9i;&xT z1b3^vZ1T%41IAV)Pa~EKutwaO3AzcK6V@4I^zJ;&*?6||1gwAfID<+DZ~ONVS%kpA z60dddiYMN%qT`YNY*urk;TKzeCJYm|Bqg}dv}g4s5Qc~dk|1b(=oghzZ!>;sJWJTP z-85AntXPDhM843)$P+*U_|WBd&{U!)`Tdw$q1ebozou2`v86nn!l~D(ijUPRRD<@^ z@afBUQ80f>CXIiZ5^VC8HZ~Q&TD0yq zp3(@25%J!kzY50|PsWcdmqZFpUF};U(c~L!`5-87cT?NJTdgqq&*v@Xy*PtQ7^ZFW z0*661R9O*MnAqr zmOLg|^6n<{|M%OQ*KY++xMV^Wd9pnuoM%icnG{O0T$agppZt<4rfz@R{`mTBVdk`= zU?q$)B6`=|Bhk&9?ahCGZ#c|m32-hsGdXwcq%4x%`R143vSbg7KPDLyMkh~>^qjzX zT*By-{Cx9o5^Z@LFAK(nT8dWx$M*G4*VFI~Nx0OeRLp13iG9!NkaqjCKZYB|U307oA zDJR0lTWOQ$j2B+&HzY}3PLgD0`YRXmjFX^_y`1b0CA#Hl^EX`AsqKlbKepX)teQQ2 z9@>u9tIOr|4D_~n^bO@6}p_ck!GUhMV5kY@DFytg(v)J zY2sWBo!!;FC+mM`_5)IYP1>#}nCfkr7s8Cr5b>y>Br@Wx?CH^P5A|@i9G+3*>aYNzixjuMlx-W)=L-uPxeoD&9O5I z$+(_GW&entlb5V4M=F0WYVBlT%}WLLBfAT(Sy8B^-6t;;(xA;ZLS^YscegrEx9{)n zU(rPUaaUgk8V^T^l*)gE9q+p8a=d?k_cJl7+~qyfM~EV%TSk$a+9A7U5mAbSdmvgM z!_e;QJxYI*7irTDH2CA%53~x=uNTqZFvk&cjRFHNbl#%x2q}W^;D>acg_8rV%CxI{ zU`Ug+D8r(N1Ut?~^AIK>T(mv$s6SS_0n!a6#fW^vF_5}o5R!sH&w>G5OKOw8nTH>4 zuuate3jw7`z3Bf?z4BsA2@ltS7^0~o9A(bRuS$O(&cYoj^TGBN%}}=M`pQx0bl(yM zT`&+VlE|bW)`T2Vn2ID3^b5MWl?Mo>q#?rhb5* zna+PkQ*WaZ=b)DiB;Uo@0Ex`kbw#a7G#8&*XT``3U-XTQ(BO~D8*v^lRQhN?pr_Xj zeR>0(T9??+>!Ii@y`C_!Db9@mTU#uOU)SxV$LFnZx>y3YoKV>wm0Z|Nsk^cSdwO3bR56j;S7 zpLA{S5aWo2!~g5IAQq$%lNs)=Ss4aS;_zF6sB$b-LI{EosA)~9E!-v~(M)Ata5?7( zfOx9D_S@SHfWq_-qFkN~rYUndd8*>XIb_G&%&BvLBU#M#XAC(GM~0B)V~iksn00?v zT&e&Vm@|boS=^o{RS5v&GINxI8AQsg5I(orxTcV`jY?*8-^I@5cxi)JzeY17jx^ORn_;`vpWGk zhSnn^lE#57HP(HT?y80cAszZ{M?d(X+E?eQ^V}|7>-g-iJE(aZl?jhI9i;15dv{gu zM+fR-*0p^F!afT@4J5F!?qKGXTsq{)9de}a-ZndI()g5=aXgt3i6%3;PuG8~0nz{J zt_6JxD2P9_9@&B|JZQjnQi!Ceq9JIAAt>S;e2+8FPlFKzDN2}@#@Si;m8+WOSnp4~bA<}L3HH5ru z92l3HJ71Q;!JGsHuGa1Gr*#;2#~ZfNFg*dWyU(F8&cD>KL6XV83a3iwd+2=t27BA@ zz*~9}buu6Ut8pEAyRT`D;uuG3o&5!JsEA6*^#!_%D$vS@>#2VMzoT!uh0?uA z&~o>8zvM2MnhcuQQzoE0FVhA*vK?r=pHil0MBibn^>h$Wum;?II*${WcX|kCEjZy@TwUd3q-*NhGo7S26H-Q!3QfaPI&x=0f69`tVofCILoL}b zql2&q{tk|Vw;wAcpuB&AJZJqYE<+DJ*xmKy9x_U&Y!G{xk%b-^Ne+QE5nM)`4^x=2 zK$Hi20+WL{&oL`MSx6@_k1ha9$uD(s*_1&(|CCV{{Oek{sWi?CB6$Ag>dT>fO!ye> zKtXOf0;uo*!qW)_iq2TcWu$!vq@F(3P2JfYe(Qev)4nmz~~9rnn{k>1m6Ore|;^B1=XAF&Hn-8Xt|RS3lo=XxdIjg zF*!GvA%p@cf6Z81Z{s!=e$THE`ckXB(~6t4*mQ>#&(h{ggeV47QeuqhAu9GoRn#lN(r$eRdau^PsPxtm z9GeE8f2)l69n7M^_ygEDIjPZa3Rc{-=WavnA(+WZs()5`>g)`Fh01n z3I)K4W)lRq#jdQ279Tz*Uwl3j+`V+}{)oV$JiVy0LJ-SO6VFl1ujA(f%elk z5=Z^WgsdD9W2fBYE=)he&BG4uVp5UJ2)FQE(TnNaTX*gUN)Oi>a1X7mX;I7Ae?U!% zWLdA|X~_^vgs}YyhwlO#&Iy%6cf4I<10`PZ{scr2H`ZCdV(AP3uEFlMtgq34+;83^ z{gY2=+&CN^lN6H;w_TRjhYt7ub80&zcV$(y&gBG=v&P4H1l*PLh)_HN3RRvJj+fBmVdaXtEQ zNjt_eVn$rM$Dkk8D%)O$FBIDseGoy|72Bnel!3C8XJ=((M2&&;a*+!slIvNlDK=A- zjdH+|d|4fzRp)}u5T2?21SEyDCL{$Ie$2bBXac%eM|XcLmh=&lXgy}9lf(66 zSuX%N``n~w%$!SlMg?3$`NKZ2fqukWlnZz4Oir^}TH7NEf1P{}>UvVXBnNKB ziu1`$e4JvJ@=3s4A}381E7?Ph6K+DQi5IBegozhjNRg{_64o zb7YZU;cH9zf5PDAZ^s&?7{DRb7}s9-c3XBqpHTRr{8>ITbv9WDsWB{}E_FA+ zE)=ZZ2DgvZTc>zgajU%T-&hPNX5DP5pBc$Q4P~<*fxK36RD(kicegje-K!H^tEle? zk-cqEm5m>`uu(6&Lvg|dn|k9z|K4MvDdzs5b{q&M1MYq3fA}-pt%!zzFffd|3WOb~ z0d_Z8v>~}6W8K%3s0KYYNRQ$W0|@&7~L`<*{ve0UL(MQGqRD-Ol) zo8ss$7MY96+^<9mWxXdANP$XNwMg?f2uJJZM@NkA8IDlGUAV}%c+$naBUX6_MYm{O za&wlxhglFedX4+-Fj(dOg&3#X6`_WzyWoAyyk8rE@B|}>u z1;Y5&Lm+!CwdjTSsYq0`3e*o=BGRcXc)?u*T zU%mUlgAKg=A>d3Uaq!6=-3N*pAz`%(-d_Eig!9>n$goI+zHr&kd7a%Jcg;C(DPtxQ z^_TFLO0b9Cc-Y0wFW(p2@_KHhu}{5E&m(2{?B<5vz5Mnzt$+Ooe?7dQzjVDm?rHg+8~*Yc ztrzT@B|HoCB@kv#u1O5>oZXk4B~rSV18HuZV_Kd-FiF5eTOsn|aBObMV*`J*vmYGa zmK80ktOf<@DgNuSsGDBfAve^IH0ZT14^3Vc>8@o~|2ksT>U=DYb+&O*Zt4HWpWeKA zf(*;NNUPJoe+~mofv5@xdi!DwVDIGk&kMUtW>{T9f57?M@{m<&Q&!HTdzS!^wHP3< z$+mfs)%5T+`{wHrAR~p}LftSX%U2}q%8Y?|#P z;3%Mc5D8$d^bl_uJ5R(#oi((`DXU}^e^e?=oV0t1t71{F&*D+Gmy#gNNLoHU2{ICD z)e29&)?}6j$|`vjm(H~J=@=LbGh zd6Urk!~8BQGAMNLBo#GSQetMd@dc|xTBSfg!z8M?n0>#cqj&l5NC)c%9!qI=UlwJ) zp{;gEIYZyTyyjgx{Bld5O+}4>f4M&C^Og}$S`zPGl8f0{{fmDcSSlYS)<>=?r*Iu_KS_GH`MSgA&pODD#Hj_(GbTXCF~ z4QEK!)3?s{d1ZSJmB@>>FqmD$&=ham?;dqWXABJmfIKn5#;I#tPqZ_}e?4*KKau@c zLs=EU4!aI^Sf$RL1$5$gY2u}#3$}p1U-XIn>A9sdg$yV2jKLeM1Vk`^`LKl?eP9cZ zf`@fl(95oE#NF)kp+c{#yH;sKFLHPKc5xkx+3L+=Nzc&Uc0|z;pHJQ~jczNz$)Gid zUKrvbCRyJVRr#rQ!&0#>e|Pl3^&zhwPo#DY_?j)UsM+ckabHMtYgNqYS)Xxv; z8n#`;Wj4#Fu{k{n(n(U25D)Zptd@^b_waP|4udbw`+4WI5aT=^f6i!8XvYnEgM$w+ zoE&JC*HhhWteNb(iCS7v<3aY?V4$L09mjy37Dy1w4puf9bf?+kW1IG z1jR?)$LlIR+%MlOf8Nq;>}O2O2{OV_e7!rmi5U*LZUm1*qAW6ds_kBBz&`G==|Aq$ zZi@DiQ9NR@AEW-K5TiAfHyBzWclKh2E)rZpQ+QFYMx9D}IP7v;`nW3xJhB5T4{6B3 z=VL*&fa4uc0WF-pafC|pJJQeGD`*RkcawfCrD0P1UrnAuM6tg4u--;0+I|66XT=xc z>l$IajuV%FmZ7V=1hi~{rhUBk7)+D6OPDjS1_uI|XzUG4!nkk=Fp9lH->t6x04_Ax z@t2|60}}!;IG16`0TTlKg>8&HlIB?)a?w;6B=fl*~go|5ozIZB1@T2CM2!=scOr`YV{W)Fk~jROvt<= zu-uf}vc6R!In1BntJ}HCOtRa97*U+Tc?>Em?{l)*H z;d50N%`?rBZa4<%?!GBUha<_r_*{_O9dVv%s}o?!jR!=2x|MmdBfz#eG}W?fJz7KK z?}%0|;eVi(L}Bb$6awUIvaFNyA|@_0t5kF-$ZupLsbHTD=RUR#Sb*s*?vAK??8q!O z8=9aH@&q{P7y$_%JkH+Bt?(aJoha6&li6GXsUi&}=gdL#9I;WnWz&Kut6RSesE<+E<(zRYF3V0ZF0<>h%YWc z&wqpq94VO`+j0d&X_F^4D*Qm;UyrSyfe!$%a|0m$%m@&_|JIocEH*X$cJ>6LxR{n4wPCwk|#61p%tDAWxZ3iJLfWfp`7@roI3Y0nR!VWLe02V0hA zy)XL21wZ4Ca@#wBRSAS)oD<%Hq{xRr1weP7@*z+m-p-?Sw+DfEdr)!K(yc&*<$rFw zKOS6U9^(7RE^^Is-W$e4lv)Wq^}>7;+XhgyOed-~Qy6nKmQ>E0>T?rv5J~{~`F})ak7sU5 zI%NEdGxVuEKXK@jiz^wo;v%1rQl{ffPv#<_G&%EELuMzz8bL0z1{}YP?l6 zqtS06NMi28Ah+xA;)0MiXU4uj684HD<_ePMG}oc@uSwZp=x6{=qvf7xILU%RbC7rmZ>2aJFy$^4&-()d2*F!Us8ADtM^QuV|OK7ux^tbbZmEQ+qP|X zY;^2or(@f;ZQFLbW7~F4&UweU<9=CV{exPy>Zv&u-GvETERHM~WBU}VN^{5Yl#PoL zYjhSH?fxVwtULP($G8pfdS8Rq^o12`-O?Ba9{~c=76Z1J{9Sb}<54ZasD+MrF`y-W z!el=F6z0ZWgR)tbr)i4~YTNe5CK4HVvV=xs#%{oNOEoZY=hH0rj?)|BI*`?7`MJ{- zmPK>Dm_0=7m<_5{GzTUkwcXXUcPIW!Pk}PUx(ARA#-2!+0@n-Z1zky*>IJxf&b5g2 z0rWx9@@enWeP!12{lkQm*;|ctEk{v4QZwz^W^F=c}ii zUv2fWW8Saf7pKHX>0TrRo7F~FNqxo>Fd{8oR0&$9!fJ-U1mUVZ#0bOr z9bdB%Xv{g6HG%&gaC>~_*7)u8-@pct$Ui#xXt&q=5E1H}@|@X3cwQ?D)qYPuREO!q zQJz-|td-S9K|8;i^PozO3wV|&1}a?MVBZMkkKComAa2bi;&ozuf*`jcL8+wF<_5Z8 zLW6KVJ3ND4nuP)MQ*6K3bIV(7+}`?(%bNtwR43T|F<82;Rwd z9#T2iM+ArD>_s~Yk31Y2&Gdyw)@=J=$Jsax%qC9$+!9Qauk#tJ@jnnh$|e|M^D2Ms zy?6&gGcD{zZnoOz!J-{dAL|$c=qvrVjkFM8oY73!dp(})?-;V}w5woD?M$4U9Zd~w z|M&JgrFInz9KgtxvV#SR3dYRJ@}H=0dJw<^w$pZ(0yhLu0-gjmfR9lAF&MdHo0BaX zi8S7(!eZ+AhZc3THal5j9XCl}(K(*6+YD|wS@42rovy21MAp`UVV2!tsH1AyTvXFh zSrsueSkL9Httn;(TTM)!#{?-`ma1xTW@DM+uUT?8ULRB7Va>IvbBHd-`c`S@NbIBzSb2 z+bJQU&p%0p#<}z8NW8%$Vt-bDfjbFWxB&6Yb?;g)TaC(V6JO zT1u*fK)NuL9Fg`C=mp`(gmYzD6zGz1hglN7o-IK($@SRWB05p^IHEO@lwF$Nt?BzaYra_Tq zC#D*SNd(p5F~})|3@IDo@!5KEQZ{mRsEr|~K;nYe1MD@F{ZS5IpYAEQ?m@cCh~b%P z#Dn?@YvZU6}y zye(mYW=uy4|B7&K6SC)VPH2gu5U1VYMmb=Tkfsl=cfh*jUGd}kvwY19X{BSj_Q$mn zf%n1u*2g{?NoqUNTu1CoEz#4N4KX8BDk8!>U|R&u7il;@sgP@D$6YX1|E$23nYMsy zqCZ~usuv=8W+=L# z7ts9)T^{?(ZI zhiLCMe+{i(RJ7hrcO-IeEI?KweH9T8tRiFHQ~jvnfn*%nvB`VTG$IL&B0P(EQMwi@ z{tJMa?WN3pqcuLSIY=J|F?r}&e153t$jJnxT;arSoL#tcio@DZ;TQvu0*&H1WeyL; z29-0{5@OU}8TiSvB(G%_;t<53;qL0W8{KCoH$uHP;7>b+cC3VNLANqoYh7qE@Xy}ic>&tE;4egeNq>nQkn zJ98*(H|#Ro5mPk|`Y*i-Xs(GNRj{}G^~;}M{B3%&dpnR|k&-`X)mOWq&*e$Kcv$J= zu38AWS=o1+m;6LtfM$D!>XqwE{0_^a!aMX(Se%=#v&&llVqubKq+M^TN8eYX zN(hvf`dx?)e?yErP7vK@EIY^{An;C)@JmTiNo4LoPB!E=TP!Z6xnWp(Z7 zWkA$2P{u$+;)UH3H7JRvb}xBKv4y@zDl7PT9P@Itoc&XK+aChO>tGAtl`JZx5!wv| zPR6-MY?VbvPQ2@~+P3huxqn>d*THDNH+p{|?znirZld9^T!w}aJ>V(pnX-&kKsgc7 z5viB5GMXor0ceV9*p+8oIv=QwvwZEdY((IG73}7BfeJ^wN@3&YzP~Q%?8X%4ZEbm| zs62w?WSCZFbTiAw9mpYtk})DpzMgqpk|L`i#C;{mC2T-6U217n`uCy$#;)GjT(+6f z$LVQO(v>xpFcixMeJxmN0De zS+NHv>ylfA!B3yW6eA-W2`AksDXw_AbJ7)A@#Xp^sPeCEzAxd%)*GZz!K#;xlLxU1 z^K>_)kN!6Yg5abEg%vgo|Fi}obzt-L(WsQqgp$;jscQZi^JO(p0 zP^pIL7!PxLQ8S#Ji$?0gFH{&eli_T4g$dz~kiE+s1Tkr+2<}F5OwKU`gQyOAlpzN6 zvHM!y6W$l`%~q`?%M|!20H(-%7nn#jB736~s56w!eV35a>QCo}y-$LsS$E=MP<~9v zp~md{00G@&vUX=x3BzNWFv&ejEXIUPcV4;CJxk`wQ@#}Qsc-pGn1!~cDA02Ug3wB# zpjtwJ&q{7GzSQm3=Xg?Hs|S<4FBrpKMWGHh`?a}f;ipF6%pL0sj6Jd{8mTq~iGAL* zKdjG9?1GIp$K}_#L4q|+c&Lz% zU|94;*|7`bT;TD=)Y2EI(6T?#|NRnxF>-Mw)TyI_F>?O58v(?&{=GJO(*cB-L*&DD z({^%e>oB=g7L-mP40#SGriNjK28b8hJd!Gtm8~L#zg=Yg)8ZE!V~=IPaO2E;a=P&1 z$iC|8y56{Qba8kPcZ_kupd#yBg2ea1DDPdMHXvr)q`u*wMq7~8_5JnX`5-RgbMD6? zFfTZf-$j=a>I}T`bh*iaMNr%VTNP*ALG2@)-Sl+3xoP+&C){exm@~t4@kF%Wk*etS zE-b&<-?*XJeH%Q6;8sL-=7DBv?UM1v%fjH3qOTpb7{n&?h#{3Vd+n2#`mv_LuHX}r z`s~S4EhqF-3}iV=cRWZ>j`UIK7w1R!xD-V)M67*bH z;|(d#s1t-R7&1OQ3-Tc?6$li?Dl3tET;CVcx$MH{d?LSk2x$+~Q(JuGXd{#paPe$R zd0c2$Sw#qc%k6hOS@+Wv;ZQH+2-d%-0UCa*x*pon7^CdQ-JKI0LkGIhrV6&KGjdRd z4lZIfzknZ(cp5!lK?n>d6u)W^c|70uX}*>e?mM3ype3UJfX7c#k%!^sYqef$kq%yy zIrg|BTWSHiC{M>?58gz4)NrkyiAJkKx~Zf+DHl#_t17;y5X3d7@+AHu zEX14aljU0yl{iU=FL~0a6bHe+z_*dNCxuwUG~j{5%XN#-OrfCaY4)ZFlc)OcgXa|) zSTcsn=*pTH&M)Qj1?K9Ac@MOD0yQSSYN__I+s6C>M1fC6qc%@yU*F;=%wCa@{Jpxd zamW?!r=f$)00gie`s^_iE;HpKzbv@6$bciN^mn$ZWz*)GpRlC%_#V{QdAJbLn5N5E z%fS068`1%9YPL!Vy7`%aSMLXL`1C+STb3$#t4@> zFrK_SF|5hcH8cW~1+hTO4yf+WLDO017y)FXItP8?OT162<01O&gk)V|r37SNMe9Zz zp;i~|ICqhuTq7DvJ^9X|Lda6V3Q%nqe_%8cZMZh~1QmC>0~rpS?2w=WB|X^aEA_;0 zD57Qtg5&F@VpcxP*uU4o@lk@QMIK5EJ6j>+d>1VtFdvijM?$3LG>q*{iGSP`r{(E! zZP38d6VV1OSlt|rws&Y8ibeE>J;dtKnpKIe6K2391r?cR`7x67Wh8H*P$8ZL?4DdAAPB*+QD*eGcr*WNYEyEC-y>)N3-st{rN`V`0CPw+9B^*TyBfhp< zN-S|okx!ANGG8pRL*Yk8e6KepZvt1?sSC7Z4WWoYcAGr-_Q#9-!jd@9G)kC%1j~iQ zt4Xq3SPAZ?l2HxXq{~L7XUb4&-SQ%0xyxp_Gf<2{VT3wFS1?JB>WFVM0JZVg?xx8+ z1``kU(6lF-u_@HRi%ig>-&%tu14d!qp~|syY>OwWST%P$i45G6+ke}TN&=-0Ly&CQ zgb>=IECSIJDq9hQ`a(%uWm>sldEOYQ*gFHNxMF~m`yL=e3!0VcjjXQ!=2(DdpEFia3F~x zQrw4`)$=*cBmD$gRwQCaV|UXKR>ZkvWo#$Gv@!=5RM3JP>v<@LWwtQEp!mVUB05yc zf&eHVb&@2&=^Qi1pri4+0n&J3j1g1h+oQ+656= zBwf~{m?Lt4Ng$aOh;kl;`B9O*`99AUxbPDLvnUF$hH;Ve1ZU5@vTJ+m=14@uV{i&2 z+F4$2WEM`$6_zQk!ycgJJo2&-AYlq}rqS^fb;nE4vh2^v3%S+&Oa==@8)$03d6P5G zU~B25xXTXm3~Pp{c_$y;;55bPlpq4EygLz-hWZTjBK$gC`m`r-1!6L2!6&Cf`*e%I zV}23lGO|G!$_bJ92jgEDZb>?ow=Q*6{D^k6<>xx9dYZIYRel8*R-KU5nm`rj#O@+f zC6~hJ=g-Bho#mt?d((&0q-{`*EG4d7-ga!7mz0qUmmmBcBZ0m=kkTHT0 zW*xQel*x%J;yWwKDd*6ys}rJbqcdAwwLK6*Ezhkenw`!v%s&@vjmDcb3w#Lc#1_Sn zY#j}-r2f$ulG@QX_+`YA#Y=r7JeBTY7iav5Hk`=ur@je^?CQ7;P$nrhxnO(1m@X=! zZRf5WConY+XH(&pKQS{k1#^hym05Kl7*|hgv~sw!N!uL%rt+MZGC3B?4A)ae2FgA_ z+|0ei!t{HKyMCw1(pu!1X*X&>)D*LKK{YR9?77|&;4QiPch%MV)6W&2_l;%RBT4jq>rc?)D2Khr`I6}ZgodJa5`>W^tOFd+u z!@CIud)$&yf&82h()1R0?9f~Z97c({v>iD7oL-pN@J-2LKxz&+`7jZT6}dnOh}JnM zHuNtBFhPf;L@u;ldPC3k2S?bVBzh^z`B$XWOw(Z`tT<&S69^<3NB5zU8U8T;I;{@T zKiVfKKjN2*a7L9=w6_~QgaJqV^l-zfaK>n`le=HY!1K*lQmTQ(rWvSVwKJ5U^wJi` zQmuAXks^NjCPl4v)l>spyP`pCWi<|TZH`h+k)^|5fLyCFuQDoDH=IHg9J7lm$grWW z3Q)+~FY0&c4IDM$im|R@q*lYw@z}`_+hdcx=y3VQwz=!X!pp#6A!bGtLTUD^ssqBH z_H^((;3`>3G^bIksCF?#pIT}Fn0N~DMq_H71qVnf#i6fX$zVGH8H9_5h?S623lo1M zXA$aX9-IiOZZWMtmjQ3p&`-7>w(Co@rK~xat<{i_sNN;R{6eDP={Lb`$xbOq>_MIs zgj}GQePNJ~M&PwWtvrC@diWdO(ZZOZY}ut3V9;h|>qDP!1HM`oc$r8G{M$OkGB^CS z#3k~eI%dr;y5y}OnIFmhF@`TSz^@Uuf_{m3PV(ok)615vP5SQF(^^M)y|!JV9|F>8 zIAX2d8ZSmZE)st)8oE1td)iN6)xOSh zDslum-^@O|TAlG?uo;i(hCSS(LUzzjJY$Y*6^^n5jFfl`NXw8Tu}dZcT*PG0(1DG zzc`tf>l4~Kj319=-MjP7{qpu9g6D7U`3j@Rx|=kWJ*E|TFx)p4c_u%gw9scpkq@(N zb-4|6x#wCZYHF&Nk1rfm@;S4A<+Wa$yjQnrc6Yy>G=F`LgjDEbPW)b85vZwt15&c`10{O#og-ye7OqI zzpU0^`dW%&frnC})sd7PXCk>GoDdb06D%f-!54LKdRlhxTMCGoL5oGg$4on7PbU2hvL&8)z zH%EQ(R_8xML18hF5@w7sZJ>-qu!nZHG#gbS{!;Ko9DpJKCBoRj2<7HrKS34QqQ!10 z1vLTYAd9RhRuDFVMgz^|RyPkmq`8@9f~q{SdUNJE+4*H*DGwmXwV-7D;vEna;*`BtOE1PP2q9 zGVqam;7rDbfw^VD8Cj&oas7@WHm1iIr;#Yd=XaI^5jB;=m&8Tgv;XvxmR9nTtfwFs z;=sm`*cWpdW#UNF%)o*X_kAka(&l3ug(EE%x4Wbg$)K@lCply0|q8j5I7Aq z*T3lfr!m1`|7^d!INvx@N~r-yLn}edP!pTQD@8T0*;nkxsO1;ffL#OuuDzt>@6FB zb;-C7@??5edDpfL^JPn{Ri%MT{mD*FBAs&@E|r%`BTm-0jJpI{LYo}b zW^_vd;o4pLU$l?y;(w!k%K#@(=oI4>06Q?Z5^6q?(=l^`a819{CnkVRw;PJlh#NW@ z!G!=spx>|s_Lu(qj#x$mi8OKlpf2lOUD9ZsxlVyY)`CX?uz`I-!KW^jgOC4>50 zEBu0HXhYOW-*3TG=upS;qvckXfEVD6fiG_H5Bo?qH zr5OVVgWL&1L4=8A$zZ@i3tv%0gteec3KtfTy*|B>J3iWuL-A^CVEP#aH70I}fxviG z$G3qzH7pGZA}u&GDiVy+YY3f&V}Wk<#&~7mcb1KJZS}!l*i|Y&lg`Xk!SOPgxXb@( zF*`~GcP`iEy0*%m&q6KohwukllfGz8I6z^B|1;>w#`=>)i;^X2#o!50XQvlcOf$Gc z-|?1fa9;F?;wpFo?@&;*4=1(%L?C~kKP3qV#{r<^_lvVK7KnoPC+i6fFS@DNYzM(m zn-x*Wa}Mtu6!ma@wkn(feQ!f3e5D#HH{LFIXs>VLwpt$T!2||B;iZ5h5as4p$hetC zl$XxFTo|#@EU`;CMva(c3<$PPTFp~^W`eLUj6z%Y8RLr=)nzHq=87}vN_$is6-6t? zi(R~bb(1f}BT>}@4ucf3<;SD+fp2FA6KQacO2KE};Yo!a0`g>td|CPX>? zzF1wv0!12)Z#oOm8g`!?G5Wf5JaC??#O}NC576US$XN;D52O~l8MSD)(<%s8H?$ww zu5^mnL@a}<`QVbgr5pay8nX$jAP_lLb{S$Fe{n#UE=2TqsS$yXZ2ddvWQ{m@?xGEb zFk!^{A`Kw5@ojHo~KwJASKevoXk!RCc9$%g}=**!@J^>z1R`GBDxboMoS0 zYf^KfAvM|J(C``zIltamSXq~hz~mW)XtB9JGKL;eqj6#iz6>jHQUT8MNknZ`<<8Bypqza67M{u?7q3 zn*)!v>~iT>rz4Mp=C6O(#_so~yS*Ckz#D^yug zHb~=xEH7HZRJR#0va}**y#7Nazy4N~+ry1{Pn-tPRh*Cu+y_-mNG_b6b%tdMeoyRx zFDU7@+h`~kR*oA(GST`=G-`Xky(5jbL|XaN>3X~KmzAavKnxNQkB8`p(gNz@!e|C| zYS*t$Ki>eTBgl(m>5w0+c#51&>p+Bd^SwBMhNhUSbfr{@jRE~qC{Wl&Rzf^(X3P{R z0S)pXg)g_UDET@!si0|%=#)TOStBPP(Eo?njACT4dt(w82hnyNYb39*L-C1l!B)2E zssFQKn&TQflFyUIbrFj)g}(!dWV@XC zKI_s1hYVW3*BYiFk@ z9KftD-OLXka)YEFggo)Gjwq!~(8+5JTVMNqFu#8N$Sfm8;9sp;6RcB|#Ip5oG4DZY z&ErM}MKKKCwwo^=7vNK+^_X-f?g=-dBHi=ePdZmoDVjL+$7OLYP>-Cbb%|W#$||GG zTzp%j6KY;?QM0pbynx>+2*B|~ySL{g&H`FsV0(-+{>6v+TbN~O+$NI8tommzn95^b zN4rnGnj6^E56&B;aSW>LJqIIbGoG3|?*BltO-ZRF@4jkOwGU>12obE!j2f&z5HymF zCia~OH|ov7>Oz^sFf>Rs(srS`TT!c{FKJK=c3mEluH?$e%vs35+}bL9cq6d5%>?3~ zIJRxgUnAn*V$N9bj*KleSi3{L5BLE@S;FO&BXrK&_i5uCpHY-r3yC)fIi&~%br?PB z);Nw1y8ryBvO#RFnFxneQ@`SnyIAOH zn!LTWv$|;0ZwcOapa1qrRr{5ZPSD_~bwz@kLo7RQ1axtqZxC8e zr!RuAeYKkvp4JZvP{&{bU9u&Ik0`>T@Ye9LkwKk{#kQdAA1f_cGDwCDyyrZ(Z9lVp z?xrj;nqSZJa**#}>?2IRQ&X2UWwpG;{zs8$q75P~RF$C{8Af3otBlf_r#Gw8cv||+ z$0RCMhT)?O<6nT&#AwMm1VWUxr77%u?M}VN9}<9iA1=_g8f<4izR~0agJT6-xotam z?n@F%D>r9U$Aq1@7J`frPmFo}z-hQFX@{q6gMdyVbh)G37>44YM|1V|Y?{AHrX)gfp5 zUG49^^@@1@x^kS|UVv;IWW4CeQl|VIKbF!mslw8gU}{|_ASnnM(p$c){eWHZJbla9nn|(6XR>OU0j; z<=;=$bMMFCs=RLs0LIl|Tx1NNZn|hIEv^1+bm%9hfAhXs3*x<-D;0x$2n8Y>sM0O- zgw)EnU^{Ih5$(lN1q}E~-RK>#dmFKVo+^pVUH65tsTd+2#SKc}U?Ilkz31ClfNbzq zP%%5GxAv9eURcjRu_IbE%h0sg8Dnp4nO4_Z8q`X0$~V!80*%7=0n;q1H~Ynn^8O~P zYX6vPZ2iZ=$VkUXtIj^y88+t-97*8h!Rw&Ym=5bi=8HS(_Swc7F{lUL{zloJbCYK2 zF*D2dqFWfOI0!1*w<7cqKGWj=d`bNoBMW{tUZmZ* zG+dH8wELANLFeCO*P^yN5cf%^SE z;Ye5>kEVFDBZ>H>_&Mo&%2!Nr!owSPD2|+*uo_3F?FVykhM)uoPRlE0 zNhl+@dTWv^8I+Y#_`}fY`WPmj={#zrEL!|WB za!zW9g7B$|9)n2LPD0SElDlsj7Dzp^8>S-hQwGrJABhY@Ssj6JtB)0sM?&1aR*0rK z^6^8TOn+CPLKYiis9?FLw>e}(o#La$f>Vm)ClH5IT4w?7tkOp=<}y9c*e(Nt{*_6_ z&ToiJf*y%=pxxdfQ%*G8fvr^{mzy+q9z}5w6x|vz1VoWhg^$w@N%%L4*>Ok(C~jx% z)QJW>Vld~45l2(jgabG($PvWFeFOYDNV(fv6_0)JgX&;{hGa_Fv(qi}W#O7%g7}oI zA~0vKD`5aCvf@LXOBXEN>qE6b7W@g-A=Xa1nRip&TGFQzch_pega_xK=_=%(Xk9i` zm%U;LPp*-3=ic<&9z9DtY~s%xIM71QbCNzU#Y%;PBYrwUktYG79?pF62VWrpcL*hG zuB2$_STi5ZptzyMeTjCPR_*$Hw?{{aE7Hwf^jc?BkO9bjn$)0lXoLf>R|^yhQhxOS zIR;GgF95p1JO2MG-8Z6u|1I6jDL2ai>J+yN01I{>IS9aO;9rBx&9-rGz>)d7Kuxka z=1d|~%D@GH69{555-%kWXo~h<`aGZF!LQsoi11@c`n$bs``^Y@Bdp!DFUj8UsVH1> zsD;hC!hxNzHF4U`>6(Qc@o*5f^%OscTDuiJ74;sL)llns{;u2fAvOC>`8wVcMATOr z6|z}Haiv9f**IU+qovA0PL{^|{`l}Oe`SOeQ}=I3v2nK; z_>~t*zG!SMslPpOE~$HzpK`e6(!uoGnUTu$p*Rl5nY}UF(asP=SKEhS&NA>rGJ)b= zQiHT&FFW{m+fK_7>#yzo*LPH2@K2)b-h3{iq{Ur~7J4q3LfoDd{3`$kC}IlR6@UqJ zCdK>;kO^d=W@kd}<#NJif+cQe4J2Mc_OIaVRSQrFGg}T%2v?aoR30=8NBp@wg2;j` zGWQ1U381E!xx+XQgy5a^x$5 zp(#s_+qK zd)NRoKQ2D(b+%Bhpt5g~CfIm~ZnZVvNXKXAFQ}Z`?r=HauYk7gn7&uF8=P3IN2wIs zb6oZ;2rMON6XzGr_9$S-JphAt~4M9nX)qw}k zTW+EEeU(G6ytbx@Ui|g0LaixlJ4I~bO0mKcGL)>w?ryw z3Q#S)=R{i}VX2LzD+SA#oqB^4)*us|7ShmOsz<9XS{m8Ihea`A64!zeM(!2?ILKkk zpOW)DFFY)AqzB^U1#$INVqbu+?%@eHC+K_9&Pz#nsW4x6ggiN9-Di`1JJ7}wF^JMG zC;k4X#@3>9PSQoFid zEgYJ4C5#k7o_E~Ap2sOEgqnhW9rupBgR4+2OOL;1eqcGICyCqf?DSw4G8(&78wqkH z5bNimoLodz%MmG#$4GngP-F$+v=h)>pL>`^6Z+KbDQMVU0ix^bG$4T~2ga2KOGOP- zglHa5#A9X-1;WI0kXXh#TjR>vNxRRj759Dg-uLBg_kq+MeXmmn?NyrOU$qZVP^vO5 z_OZ{(PcHUc2lw@qSpQfnW#bM&F5WH5_8fS*#@7+}Ocsr$=H7MlqxoWPs_y!U zm#p?|(Ox)bh7&M~N?CHru|=|oRvQ@6IKc9pF^6SM)Ya9~gFN1z@|}|B0KJr7v%SI%F};{U7J=y`b$;%K+$5hIJw|rc+1Tj; zkH2&6n88{EDG#3(>}Ou|M$cJ%|5WxfhK{a&i>||cjwWCM{7sAE4+Jx$Q#E-{4?~ME zmbtCOzwm75eM1aL&kTYwO(|FfLk49^xnToG2jgO9|36YF5bIz38vCtJw||VPRbZb` ztRKf_`QHrpk)$c5>?xUe={YphI^sC(`1G6ovzDLm*&^2aqzvG*7WoSIp!nWU?$?Lm zZ#PRDOH&-u;KWAM;imR2@PXTt$CRNigC>dKZW-H0iYyJU50@8H9Cdhm<*8(hdx7_d zdo-f$udkPaJQi6-J=ecICGMhdrfDK*Q=C)cqp9WcdGz*b8nl%R~xvC~~XTG`a!Q0j?+ z8<*k1W^&n0G{_i9TT<3QgZ8ZjfhQU~&{jzu!CRo3&Z;%f?OHiM(#mRdo4NsJ_F&dx z4R9xY>sL>ZQ0Kdq)a0oDsn@%;DD?8U1$h-XUhKj8=0NqRI^mUF6Qrj5^*m;eJ@NrO zs0xlaaD14|F{WT|^Ft;B_FV>>#niK+>z5IinebEg$nju}qSxhdm>yDm5t-a3Pp$dR zVs4Z@B)g-qOl^z06*X(E?xG*X?lo8{9m5??O$lN8AI0~SYY8>$`)6dj% z6w?$vyv*G>$oCA-Q|f$>Ipa8P9a>{M=2%k_7&pvW>BJZc$pH2uNe^q1m^~g8UA7yD zH9S^Vy;?5z#*Z(h03SQc4gIA@bPp=<@&S|8(LF3<(%+m9R0-T95mvK-N>n>|T;yaJ zDbn!a$xNL2)*uw#;;?TwCj0eZlBFUKZL54{$>&$L!2vJkR@k(ezd?bY{3bf2i+q z6-{P-nSrlok5{_L?u}cbiCtfuAm9{AJUS4Cp9mAjIOs{)6549!~=+ zHTtxNE|i$R@~4aRTt!QcNg%o6dU}v>G3gaiSe#O%UI6$&Ie>C2*2HLELoKS~-TC#3bh*LyDK!vT|6SgKY+W)Ffe`K9nL z9MybEBL`C8v@0r%V%uwEF7qxLM>|+7NA1jk62y#DB-c!4Ct(ikth@LPAQ-{mT>U_$N5;74nh zJ>0+L-`#+}X z;G$w{P$`FR05#xEP&a>GxRSw5UF3utCk=R*9f-wpOQ$3z=Dz`uS@Edcpq_nrNE@&%VQsMR1m@r#{k=akYOg&2KRU=gk z^KR&B3ofHiVy8c272E-6d&}iFPJI_{0@q{+P5#|=m`1oz`wNheIWa;{^FfFcMteFK z1r!#F>|X>NHUs|u#YCBy|1Tz*^7Rhz2J{i9_5s!iy?)cSqlh; zD-!8IwmAuf_9(wt2Tj}jkO*MDqlkm7bu2~jc$eHM^V_{LV@WdB^QR2dp)8VNuw%uC zin7|jw|8`HGYg5_#!1#>q5g@WSu1tSXW45|o8}GZ<53Y26#x*XJ1!ZO>b?#Yw9QC# zggMg^IO#T;_#H<|3{>`wEJG8N7PBr5#txv!Fj-w#?6Sb(+z`c;FvY1-pbW8a_U7c= zE_HklGGf4{9+RqEOkv!~qv#J2Oa7ZWg5QiAJ*A2^aDyEsQk&Rvb$QF?C**J-e540k zIuV2sK3!dXz0o3|B6fB7bZr3i$xtQu41QYyK1(J~d|;$R*DCP!H~{f#1sfppQHywp zduFKWGDA}MmV{OF0Cy8XO@2$_1+-fiXB9^J17L2=e77!2yZN#eAH~(h`wS7CFgejh zZK%^9^(;bl6%E7+>`}F+1Coo|i>*yFPipNjFf}(XA3d9$Tq0R(v>~FT86q<)U*#(aTsCQ3| z+R6W_?Fj4rw@7rW#7RexHpRwi$25FUN*EvwH->mc@_$1%9iBGfpz*W}eF08Eg>pj? zazQcUFhsUwH>-1T8vQRzL>9+eOJG#_t6PIWzzi6yMYSS!iQGWylHO_f%XroZZH3v) z?ZZk)N^cwH^8dD6fr7yUfV_4%dgeQ~V#d&X<-sU(r2Nob&`~n@?s2&_#Dzge5=$KeiAfAIy@Nm5){vuF=bmM%le zapMO?|Kkfnl5Xk8f8*j#G%$29=5N9)=*M?dQ72}t8RZ-9c3ps>+0dC_6&qp}hxW5k zZ0=!!iw;nwF-_fMDiXg|*wf8I>XMh-jLLdMc$`3K#fgh>bUPJFze#tz`)1C{t6!b% zbC;e5VN9wK&Y+$Fv6n1Kj-f5pU!+xukMT8ScUD2hLEbSU(R_vfaaMhf|73TX*A-j> zWYc=m+zp%#e|T-}@K+eIoxAwilUqvgzM?nxiSG^$xpY{N1BENQjDO`OwxUY#9{*wX z{<-!IIT(Xeq7Eq`RkmWVm}_3@@YGmtb$Y@zjc)~bSf@2DUDm?yg`8@Pi~f6V@wvzW z@~d0xMo=sQFnAOH80_Wz;>8-&nd0{co@>y;?iMt%CO1&UIe9gq>Gp2!TTul$TMa|2 zHau7juei)5m+JSKs@uNn&h~TE2!SoEE!bNN_qy6;5GYMW({`J58V`7t|C$(WfnmlI z?BB7t6yYF)k{AkW{`*jHTEj@aYA?n`5nTR!ZdioR@T^maABi$On9qVu3~{{;3=ZYR zBGt`Gqf1X?B5_q5IHQ?hX%tFZDASV4Fh2;Xrdrl(s;TKJ7VT^U7hk{6wENaire|WY zpFgDT5{Ib|WEE;M&L-fvmMOcLNq+mJCBw(V_P-Z21&JZab4Akj_k#JB+^Dp$$YkZP zsM7h_5CC(wu_5wQXnCwxRM{m!cSo70@T037)(&C+k`Lecz87+>Zd+>dk2 zP~R{5C7k||s86ZJ=%qE!yP#euq@tK8-Ql`9D5FBjL2*0H$jR0ts09R(zBj=ZIu0xh zGWU`493FHGExRzYb8k)HVy-84keCVf;~o($H?*GSU|Wgk$eSrADXx42Ha8a@UJ#k; zuV%)ZuX@G@56ie`5glFvjHwB-W;b&^oR76-%D=dq~wPewgy1X?0sXwWf%-s>hF4^4&< z0DNzQkt4dbUbpocRYIqQ)W(fvzVcXXi9%t0^u4BrL`CM4f9tHK;&G zg&;uiaKv_nJWG;Bu;MNwRU!%c{>uvBs;mnB^PPoco7&b#7~btiF&#SCHX^zI5FvGo zMVDP3xeiAXgVP)k?M6o}#6FKI?*&d_BdOX|P&y5?UHlMb^6BqR;&rm=T>Z9kN4f72 z)f@Vb^1-&gU=kd^Zf}=^$MQ&i2E6~`8$I>}rINa@CoSxH^o6~@(6{~$EKlgi5c>g# zy{DMwW(kT*9b=PCt-cK=siyyy3ZG`qZ&p%wXsO#co84XD8)O~#9%u!PFZnS`acHc0 zR0QbcvNz6Jtf>WC;W`b4dbkX&Ry9N047jkja8fWlxG8d1@ni&Q@h1f_^f&9%G~rYT zg$VFjY$-C#+LIZpmVuhBFyw(JC}k2>%Tn{OgiDxIT&nXM>kZ>W4=e6N34P}*tP|{L z|HS|mi#Z-xoQcY9we=$z?C-ZP7vvdvLDuzcVdmaMd(K!}i#pal2?GtcHIYfAiGAkmHv4^JZl>EzUg4SQNmA zkx_mAk2}^+h_dgWmC?B$;a7sm=z4JVRN+dk9 zV|}ObmrV;}Em}q{^rH+BC)!wp`uP;}(#f1RQAVW*L#@|+lo-o^(dSt^RDe$u_Rq2W z?z-cfIBS~V#ExOe8s}epw(OANV+p;OM%m79)B8hyXNsVX2=Ko&Vo`*2Cw$ILLC>Je znExOGvp)W*B5dvaoj}~;?Y>KZVy2+8eAgYi_goyb*UjaQSb<1RoOI*rAMmR~fsnQ7vC@%k{|CSIA!yHAzvZ^um--9F#AZ)c=)51dqieTz zKpwtSaB=OPRv<~$lW#+Q`C6@)+XYV~y}~^T@cC|}2VB`ARH%@H98zRKd3R{zzzAU7 zkojN$7t$>oAf)o?aIkImll*Wq&?y5_`pbP|Um-dt70j?@+NIl(`=h1~E?*3i>2Yyy z`tz^x0^B?03eq~GigLWu;j1GTJXzVAj02DTOW&vmCl?R4}oe! z;0?}uj=!iogxg+iyYH3Np~#%jW!W1s!`QBwuIt${aZ`ufX)339my6uKv<*j*C;vM~ zf7iYFIeK6;`&qg%*FlGDR^j=^`>jOw`!kvkGUzO)75h8N_)5`Wu}eU8fo&)e&L8mL zx>a<2#;gBf=^WVWfYL1-+qP{rZfx7OZJpS*ZQHhOyRp^qcJ9od*x$34-kW8`Rf~1P zB!9SuDIjOQZXa07fw?XtDT*jvKoS+K4|Z73cg)_$fycU<~l4?#ON14C!Q#I%l+2iP2=5kg)BhP4tZ9OXz^Gu%94s73itV&%H}Ig+z+`FM2%!HAicZqdU@|e{ zKZZ>Dal73^10Z^O5LnFQLEfaD2mHjhQuF>b&h2NK&+O5& zrB#~#Ogc~Hy>6CPfW0Ncy2hHgNuF6LxigKRS>HxV3y3nQLMguNS0PzX)#Oq&&4Ho& zYVI4$)HO+5l$9!m@XH@ft#^XsNifr4i7zAu zX@oV~b|IMOByPm-H-&Xfb2rvAQ6>cLK^QlSX%xP`mMz~%JWm@=h`2n=d$~_HqPhw> zqi=?YzWgI2yz)0VV+{N5jEzyb;|8%E^<}V(q%F^ufB0GeX*0fe&B{mec~NqZk@=&N z3I)<=wp!QT7Cw5mOtw&&v3ni$BJ(Vle9fVmLPhr^o>9<{jPlqNrAr3942e%(=vy^% z%7N~s)GdX!$`?n)8U^92r@8QgFv!R;V2xgkQdOLe@snBW@6iJu$VQx84Z7QIyBk_Z z(P&RA#7Oginq;C~i9@MX1oZ`|2!-XmoxS0XCZ%1O@%0*8_KcG;=#-<=&}TuUNhHIV z&MA(!osH{wT~1>2##B~Mf5D)hzKgacsiA<(c!kJb9AiWD{F2k2?ZFKzHKtS#>Ji5*X!9E)^ z)X-yRTSA?H9gpFlltYQo6-aksE!55|ME5Mp$Z(y<5Xjf5>R^kx1n-85c!#$Vq7H02 zIhz9@T#fK+Y!YV$woV2v2h9pGsb=U4hsz*3go*hhggAMcb0OVHHwG0>6|0ssJ1_cA zy~DEyQ;T6dEN)DxdG|X!;zp=t&i;5`(CDda=E?n`rSBJKVzAqe*;k!Lj67qH0epTU zisXou@{nCyQjI>cfNdt%mnok#ES5n2yNMSdns`)`4zDjZt+Ng%!{#56l15^fzl^0- zpMFOK_&Gv&;Ob4Z)b-6EVwkkt&!@!($iJLEG_^iI36aIAX5t< zGhAfMI9>d1{{Sw@@r=p_F_1ciqbjQ~CtHUd>~X>_^n6&pk`mX2Vq1)Pq)nFj()fp; z+AQXVR$)3PndyA#SKin4$cj&|*M|5SDIjcV;>nG0^4-&{H{G;Tz)v>yFL&482up!l zOVTRD)N%HO-4K`MsA@1HMI1k$iMR_ z4d9q&HS9OJaLC1mA*(;@G^Siod2%R(KCpg@O^CWSQ(c%#JPc^qMh;9B0JqpbnVUP+L;2`z;M#REwogWV^fI+#xr> zq_ch5H>$C_m{pUwG`vPU#%UD8y@IW9M@*QK+-~z)Rg|ZxkG8ACMTlUQJCa-w&`&%d zN8q^pH*Z`*nBaYxw5kl#(Jq9HhnPr*m|DCYgc)N5Ue^1Zn9#%2MSuqk`1;r!tY|h= zEGE6tH~*x`D#Jo4*dR!9gfyorX(>|k2=&{1gZ?!qMEKv4VM}Ay20;V)#qysCahkMm zG|+7TOnVI7AQx%Ef&tUid&y9S)=a#FIKG&U=Y{O~>g5WK$z8v$rM%^L0!u~-K1BFI zNOfb+P#kW1E5&|bg2Ge<{3Z@{V8!VOMxq5NPln!Jj_#K#L2ZBate&$L6HP><$m zh50dd>!Z$UAP&j8;#EGiUsM)NpZ7414l1c8rJ&{v4M{})!TKcQ)U~l%C|s1vtE`?s z@KG|S$^Zq7gQWx&n8hFzY|4jFLO4}eE#;tu*cnne5gZAdu_4rFa0xmH{nYdCQtebW z_J&GE@clCsYQJ!xY)2I8U+4x#AEdiTt~!;s$2DicZKq@^2g$EXTy4J+f6 z?TF>nx>7<*hWtM`J$zod(;@H{6W%z0U95OY1)Uuw%6srFf88l?ef%H&LPACaMB|h}&hnx(JqXP)=Y5MT85&r-enxk0Htm-POdS2PVxT zV#!9LVu27RxM?-{{967&C!o9};u%DlC#lpqGJGi6!>)b|8z zAvW^a6yry0EmX|a|BOHc3OSlyZp6!9jssmiE$`s-DT&Sf4%wkzKQMC>f)8g|$}G={f0F@Y>$IX1L$&Oeo^HjDWqy#APuU zjQ(3y4bI9{ee0HJ;&J~x$K$-P7;d0J%voCEo#-9OBJ~pWlc}?CU&~kn z$aYNuh-X)8!?{2Ra9t6PBT zEKHbv+B&k3d^Hi5au23jyS<4dlGNd~byD;_b~;8K#KG%)r?B zA8V8hXbiTQAJF?TnWF!_o4?Y4aX~SFIsa=(YyL|EbE5o91D`48HX7(a1!l{ZPR>$L zcO}|fk3O=bNiC4zJO9U%j=cW*2VM#yq84m)chi#r0_ns(yIF!{%Ja>+-}pei=}qIH z{BxU}4n3e$b8>ZxkeN-+ojJm7#KO{)<@0i>(-b^X*0-mV*B7;Pe1c8P6HBWs4}cHsqq8WOx}_44;P z&tzLAhY8}TqQa4|sg|1X6F5p4AbJ+eeWhFrPl`i0$k>%kgbp+?zkI&Y`x6m_M+ zh$TUJr%q^AB4fYvRn(nk=F`%-(_bDS*yMifWVYz{B0o3zkzghhL~g3&g?+<>F?r@G zmgkmH(-^v``}@n%yNdvC%pJrTM=(D@SVFAAB!Oj24z<*JKnAblaXW}^&X{rp$}Syan6 zZS38ATBDeNOmRcBZL8FJ>wFVdE2GC1Q(?!alj@EjlSKtGb1$4{vxE{B4}aO-?&s*Q zKJKp2IFDy$>%K3Mqq_4X`n0WIzZ?%eUN%U-NBTjgFwp9X@hLYP!@pIeUXRAvASq1x zH_l09XABfIZCiIdd#?9|NiP=wPhFh_V#>{1s%xQ7jF*+J((Ww>2y-?`-XYJWw3js6 z&y$<>Je3q(r3JPvwo%*IKH_`w{D8MR5NC!d8tdHe25fvLDAoHjs8Wm=pMBZ)lNNm6 z4tX)L?u%UZ5a-9E4LRN?gTU+A5qUz+qlSi6Oq=)0N!UMugtY?Z+0iZl_GjVS_ih~w z<-F0kiK)B^_YH+2uhp4BfSWDQ}}?y%qN73>Nql;Cqrzyj^@%+dlP@|HTO2%OI&Ar1%8LYvQV?Aca6lu^40bY_`9@a!Gc)SumY-Oo+DV(TG_g%x*KLOgOF;mHMl>bJ zzG6R98*9RJ)L0CDxt4!yys6|gno7rRb3&Frf9Tb+!n9IdZS^>yH4d{ZB7M)Nz1@V( z!TGk@Z47UojttL~#!FGqmO3h>OpqM>uh2iMfWnE-!BMT2qsmFst%}m6U^qrNm;}qX zjx20-dFqVSA^-Q13|x1|nm2MZI~4mlerQAl$<_nEs6_rE+_)Tn7fld~F-1(Cvo)8= zJm+0Ho8Bs+pk^X~njmISl3ElyN==ANwfFf-6KY1p<%zDJG@o&2A`-<}ULq}g6?Gnz z5G)yM0HsJ+5o-}M;D?moJ4*99cu3@;@A$LP2Vu9TvvVeI@b=*Z+BIr z&jRPu9NpN|I&Lp5+ux$Fxq~Uw;aYEhskIZwfdmjeVja%ra-4 z^@>B#RYDLz`~ZX;#$Xy1J_cEjbF;EJUStrHR!H1KV8dO|4_vsUJDu#8C9(~ogiJOJ zL_NDq(X@g!vIQDwuj;-$&JN_2uBpaF017EIUyw&c@X3PcCvKArL~D7q^Bio-o_&d9 zz@xY{B>EXeMn>du@r?wMDfrpI$|UTWkqvx9)7TB*CL&tPwb%?aW)FfZLwMDarS5?Ga_*_b*Q_ zbq4{^v}v|o!Tl`I+SLAr_mkWgSo~SVnE9)`45BAIDv9;q1o_EUdZahJ$+m(+7(@=) z23TQ$Ms}UaJJ|_)xAs(D%cz^{N)1d^OgJH*V?{4Ys0OQM@I${y|BWJY7ZQOfv?}QO zBuW?1iBwK3Y(w^^@>!vxtLbX@BaYkS5a!`>8Up z@fh*DcD&Q}C|QdEb>i>8O1(PUgV_FnX#bthLjOVZ=|D=G=JANVj)=wyzAVP{-4hf@ z9=KqjFtl%}iNks!o_YNuP%z}7j@UO_@0zKc;kR(SG8Ba7mOKTiby#+5=|wx^;(Q_C zP^IhLIL5)zFs32TwV^hP(yA@XMpNOjSMML`#2_N;;)dMCMWo}N=jZw{2aOlpK!6+a-jJ=n==u}8DUAxbP?H> z7s{AqJnh9{`h6q(H&4nzm)jv*A)^Q2btHGM`-}QA^{mv0k3PYJhXmT(aI6pk% zC>`Fh3Uy9%;6VD1J$T8~uvh0%rQ4H3H%GA$8rOUiB?#+7Cr$au-6Vr&{p;vp!gx>5 zwG^KaY8}(E5%MfkLZitW=CCt%=tK>IIhIueIC$^x?ahWsO@$8N$X<;rQ41Ns{;!Ot z5NU%Zuf{*2zIn9sv*&921o9Z8^-$|)v7}7l!&E`ZXF=oZL@3>TZf`_^Xn3<)R~r%1 zgN|h6+cdHd{J+sSL1~m$`T(1T(sI?hDX)B_wVxGkDw>C1fX+XKmUP3QEA|DO_S#lm zIN&t}ELJ{_A&Y|ABjD`y^yNJO`#Zb(j=oQa`wc-t1kT}B=)}lC&BZHBmiqv^BUf#|R`kk2^6#w-%7k+_afN(MY zmtM7_A#J}UhSd9^en*AA>a{0bDha$L3ySNQNnwW|8|a@(6%`}WL1sGg_c>?hK)ac) zi$>w8u*5CwaW~_yM2~fxR~>>yL#_@1-WN&w7#XsX^R3ebvsgr3aajtP4$iI9p)Vd4 z3LL+FF5y>U5Jk=Wa6hVOq563QIk$(W=N(`TI7s&b*nCK$Kf{=^M9> ze6akmM>H7@9QXK*o7I31I@zf%4(k)S_em@pwG`Gft;vG(PzWAFkxzTD2wR4wixdEv zM1@;+kN$ZNxd8Th2v+f^nSG#)5%WllJ3MtrCIcIK;1hehjgS(2%{d)V?qSU}($tN{ zuUiVctOYJx0}d8~qpRin9zo;f=79j~?YM1T3fF&&{#8JJFE*$i42kFCZ|H1(smV(a z*u!f53!e_=@<={;V*J8#ooKizv+rnH}Sd~`z7 zPCE{-Gw2UiA>bvV*`*DlT2 zTQEys2HLG&udlDqd!u~z0T{0m3ZwnJZtBOTlvb%et3zh0)g42o7C+a(6v@jbWoT-H z`uLHr%9~C-kk2d?pJ3yWaQKu*M|+02r^-)|MZjazgX*2x?GC$0{SM$!6zNF)RFmW7 z{#bXRsgWxWdSh}L>4{7y{f6U zX2i;d(@k<_$fk@3G*uC3RsHQI6Bx;Yb}PN31j7E&1!RKiJVrqN)Aw;G4$tEL4Xn=K zy+=#GxO9DOIf8oZYU*#RG48Bfs@24HU%3K~`NVr}O)n53U)sSbef`(^bkPX~8l z9JCi?a6F%Rur_bu1t|dpAMUJ!uOrYmq=5Za`n!-MRN)F(I)$h)H%6^1pKPLBxXw2p z*`J{gUY=N4khkQHvDr_pryZ|(eAMbN-Rf<)dT9n!(>WXZY&`7 zG91Ek4~E1yE$Yb|>ZJ;vRuh4bR8Tp-yayKjK|w@K zuOOj-xaFoi00ys2;hsMLS&gp=WG=t$4lI3}8AT63v-36+zj>EJD~Ci1PsCp~7N^To z%~yHDyO{K8k#OLLL0-`=7l6&v$|Q%90FpZ(47ZAWBEO1EbPVQM-AJX%3*ovsCP!5- z^j4M&ECt@m9ALUhe}5T~oZy*_tV5GR1_o7u9c)$-bcop;*fYsE63Y#h30;sAOG?X625LuT7cN!|3Pwa=-(3h>kW*T ziU}HqcGmXku>_98#0u%d7|5lfr-h{my^A_u&d+xZ!Gy&oK^z2GYCNJNyoGMy@L`xd z@dXkcT1xK^+zBn<&jSiK6;|<FcL>0Q+gt32(^8UrRv1 zk8P~OCNOl~7t5Tv$dDVzEcZi-brgqV+zjH_K{{!{edwW{yx{VE|AS`A=E`?FNktj< zp>YAZ^6iY-LWt4?@KGss^`*3`cG(6^UUth@0WF=CG48t)at<~VX{M`G?$^2%!h8qE z1)%o^{7>Cr|Bot+n%3?JLYL-82Z{)Q_J1?*?#NhAGs%|pvOU@zI$^=qD*OkRDoIqq zd#&r!rF+AcXk^T_%o7NlH)_c1)1`{#6LWlepn5slI9WJ{O%cm(QlUoH)f3mzNOi!r zOkqk$KXXFv$?B<|e{R37^4IXPCw9C zKfJ$*lv=eiILeq|_pEMJ8S`m#b&5C)Mjp$IG?KrucT+cQx_IkBo|8pWcIrA4kQ8Bu zLG~R&J9cOd0bkiD>g=JPh-cE`;MhluY>mSQ1yTWAfA5pDB$O{4;zH|6Vc<`e$BEF}`Sg^O9GzWnQau?yHHcweW&Cc@t+ zEUx4CQgLjR>gvx`87);;Az@OIu**0wV~~7P3?pKdAJX${cKG85yLCZGPzIJ19(~KI z0T2>p(qnBItu&mTJ|E6rNw+CuJ#NwQj=_1R)x~dj#}+{NdLxY}lyLz&oh4Ng7s1#O zcJJ~?b@v!T;5+-H30h(_3E4>(K}~QH1S26 zHQ7}LsK?rBLoI%c+es{nls`F13gm>fPdWs5dP#1Zo;Zbr;L^i>e`F2wW`lolt2jj{ z4J+FYVeM(|$4Li>yUqa5#^IrX;p?_fG*`5igqHs32sp`7GY@sLDKl9}itsEF(*vNB zA9xDi*pMQ(K?=KJ+)Of48M*IGZv>Kf9{IG9&@w@UrmD@vS7I8EcH`)SL<}Bwr*n?H( zZ6Rxsp;*LK-GIPt{6kZfb%O?cXOZu42Ai3tKgZ{~H~4MCFN7c}%WBub~vCXSS=x|DHI^ z?eU$!eDEN7{&Hqq_g}%Y0aArbw-GC1+;j{yf{`=zgT^N`CzHLhAQyN;{jJum-@F`T zQDFo}^XRkGOlKNn-e?J$6qqLuveY~WmimVq8EV+Kds@BOOxJWpH)|Cs@>T(TEp&tL zZdV*oZE^&2^bF?NM#YEPm%~Fa)$sAmtOc;39aCYpMGP9t= zhkD}MngnENiugC;LjG^bc=$Cwmu#jXONV~lNR)|o7rT=5_I)gv9AF08%_AN|Sd%N^ zc8(ybymiV&^rb$(qpqIwm|@6npkv6r0nO<7h0si9$Wv*GDnjf z%4F190YfJdl7P^DfQCZ|!=L^Rz`Bu8Rx zmDQztfKJaEhwf;8s9P#l+FKbiJa`JRK%pgrPD#8U1;0=KyBbLFs!L2&BK!&sn|$vFl^Uy% z^prz78SU?ij;_dL#^z5UcY1l^cEV43i0yO3EaslD6nj);Wab||O^^3$S6gwwDT4*J zR5UDS5g#=q96w-0;JiJ}0wk4W`)ekhDl7pq0)cY`kskOMo@S6Ht`_kQA|wjH&lES} zw>!+?<14K|4g%x)jD@oOD^b%i9Fcm^cuxV@LRCY}VI{0L>H+6U1o~uG>!X2MCOdaA z?6J?d`wMb3Es8R4yv^~O8eYY{U0lbBPvg~O>zA;RgWE+=Y#{ne${mC0Q$rv26Py6A zd;!c2r&F3h8Iq`O612Hm(8(GgJ?1VTQ4oKVu=2U{eD2GB{P4Q&u3moIc-vOOkzf0T zmO{Hj`Db5VR(Z=w+c?q%PJd|~Lk*vYdwdqVzw6>gKP$jrcaSjLV0u9S5?>m?>)Nz; zR=iP?^qXa4@MSYH0^;l*wuVRMu2AP#PZZFVj#9naMt>s8Ja-wkKpO?1?s4ksoc#TI zc{#Rzglc1W+)uS>aY22me_OBtcyUj)o81wQ{neFD2fdlD?%jc?6k9gfRQ$W zenX55$NiBpJ0|FB&mHj@yJim~>9H5?@xtr_yi+&RALH-^KgGI)23?>9D(hjJ2vvXq z%itKrvbrnQ`ToRAy`a6F{h$&_x;uxt9Mn(+Z41-Wu>tad=#xZ^)_o#<4GB97AUXfW zQG(TM zbZHgU)6yqm;QIM?nu}>kV)&_y$1Fu_)%eN3auhC@iWFg@;;areO$<1jk&K3`Mv9wH zh2N)3P>|G+9W08NN1a_i$@Wh2-8iz!zV7Db;PI-reniZRI75Tr-$p$E?ml0O|1qu} ziz^$awgD{jLgb1K7QGvLS&tZyxw~@qThq~k{EVPSV8^=mFDoON04Ro^Kip#3lveL!wVFC$?VMwECx#!Uqq+i)hC>W5E>Rq;1f1&Y}8w!WyUvTutLt=tPj(yyqRgE zeh>76iu;3Z`u4eLy3Z~MW?k{XPF5r)9Am^j*!C)^-!iuhY!$5c%G8P!vla8Vwn3*T zFl0xv2gfi$(Bh(ZvZ9^Oyjo3S~@7Uo0`V3cP1Eib^Q{EBoKUOcQ zA_Xb`;m!G%`z=dJO4tnFddvI6GcCqg`QCop+N`Q!b5T8>v=VEaPt*Rj-f=Ct zU0K_8i|(25`s|VTpjgbXRZ3(B4HSjhMfo$1q~4BGs{{`f4tFDh|w!D z;kun2caA8ujlmJ|sMAMxf}ACjSit7vaeTA&uWpCXb&xdJ7F^$JzyD`W46PLL>>5O7 zImrV*9Vh&i<~c4sV;z(L4iajC7^k2z+jAX|k^CG&m2C0ZZ`-K#H^zF-gLgk-$h?RF z3uYn_F|IIxs&$Q&m+1VNcW2p{8%WG*^j4qwdp#f0BfO~+p&@?Ka8=KelXpayPPA+~ z0ZnRK-z=RNruxG7<0#Ox57ump_RjD#v|?a7OrHrDOSApoFDp zeNAI|lkQOw%wy0ep5rDS&Bn5wuxc6GjAO~!^VPLvt5#N)Rb0I|#yDy6cyM=8R~At6 zK*yjWAwko}kW?w2U{?w5EjH^Fgi~w?s!deLj1-sleZuW{%=k8}>n&KB2QL7%(_y6_ zmt!M&YvWP>%P}i&c!p?M*<%2c;Xn{z4ya}1{UbUy$pWA2On29?D4DcbWb_-{XB(sv z`AAkK`f4V~yFcD=OF6xB^D>JPxVnXp82xdLsH^ji%^hx2i7-!XyX0kcTnI|UJe5ha z6%}j?4L6%U?xg(LtiKa~tp;32a$`t7Ib>n4Fxj3BNwyeWcR_sQ0-?h}c=ej?X&mUX z6q+3pH|pGy`vM40Fa0^_;cYjk;N!Joq$|N}CQn3|3jW8v=8X9_Pu3?aiIJb$|NiJ4 z|6yLCrJ3-8ngbAKzk)2DmwcWd@A$FJ_OE$T^=5t`BR_L8>?dyP2qRjzeUM3pTC~m$z)0 zLsM#5upf!Oq_9-k+D~`~`KyR{z zwsPNki@uWbCo1(jfiX1ZFF}P76$|XLH!AaUmYsbpn`Q5oFJlso&y6om(8Bw(e#wtr zuxRS`GJr`Wl)^Bn6u47u4`Y#5*vAXUiINCRvkW!BW=RBy8a<}wVg`$M;iNc2m@z9hH78$D z~L!ws=W-0rqav?u2K5KTSnSuk3=URc|Y%ece+4LJW?>v zPXXU!S+R14$_3niZlV?xy-CN?{`fsBu)2tNsO{{TFgp0Sb29;Q1h(5@dX+vT-D^%f z`vVvwJ>9~V06IY8rrdZOms^{wk3Ol-G377U>c>N0{}6U@blCbgZg$JSXPX99J8AZon-w$-4p>_u zP%**|LB>=df?JvTE3Y(pio!)kR$NaZzJ-@kiw_!7$OAca=dRZmh}5O^l;bDZ8UQT4 zgFEQKC~C*D7eX*xr{uj;H;bAr-D49hKTCN)=B8?@`H0!YZ<;kkKh!jasC=_g0-Dh| zf(n9E#x3C^yTUSxfI%_?*44E<^_Qt^FtdX@13V`tGKrlw2d511K=PWb#&b)ZA1RTx$DudENBfOjc zp9`~$IVbbT1YsHMHZo*MefYfGB}EPng>%(0?B?TL?9u7M2pi$T*u{g`ex1uvmK{<7 zSS0;2a)fS%{Dbf*my`pg3;=XdGv3i0inPks9scpw4QCx?=vWI-)HQ@g7+`|8YS9Oq z%qbu=@x)Yuf1XU*!rCiR=N|+P7T(=xh4A*~aQ^!?emF>GsEkj^1OGdJ znxw1BYE=(O>5mkGAZz`5$dsT~hfU0VT4(XK8j*BNIJ^gyrS=ntyKHZ8af`ka_-0u* zD#&(u=>||Ox128u*h||kBOrC^oc9C1m_f&Wf3<)E%h{3vy}&=|&Y=KBCmKf37f~*Y z=43ACl);myWTuV5ZINckvfP0I76u!z=UcHO9j>nr{IxB8`|FnEj_>NOLBqb03pXoQ z$aW;w(E8qcLUEo#+o8GFz!>H2YS5^fJ6)a|+fG6uRRhT-L_UAyoqvT7twF5*E8ISX zWQ2gYa5FfS{8GiM(5pv93mWhSl~*Ki39r9SMjAfPHz%}#upW>=N|3+f1et(G(!Pa2 zu>jx4`}qvOJe~pycJuR0(h0@aE(GHcc>i$5_fBax^bvhp4R8T}+F=z|ngc}n5WK>F z#R%Os8me$#Z>@RN$rpJ$l9ZouqOnzBHONWbl^Adh2gcEWdaVRQo#lDG@0Osh40-{^ z_Tw|-MD1a0-3ms)GEtaM!;F3M6XZ7m#(-9GJxpL)UId4MDxkLJ$;!(X`}QCpOC(xC zc&O9w(Ly)MZ32EbpGsN{dQ96ybP7nsmi&avZC34;4kjsF{^)V%_Ujq0}G zI*n=55n5RFHTLS$Upn0mI6|O@0TtHc!ht30Ndl<;6H_)$ruhHQ@4r!+gJV#15LWgy zMQ;#lK#KZ5^O_T>_f`FkiqR+)F(KK11jMopUaPr-&3lspM9s(uxY0zy3HRqC=Ro0s z)M-j^Ab4V&^ku=1Iw|LOX8d+(Wz z3z1^Th{te&Im|tq%`gg_pu??G@l`RSZZhf9t=&I@g-jF_lQcJ7`D;}E+FXUx&D{)1 zDll$Dj&9x|i%FZ%$o6a4S0D*QJuClvo9uch`>?`bb5gOQ4fE-g7r*XP<0BMH41Sgi zU^Nk)J?%2k$EuTJCMFZORc`es6p7q`5D#px6yM^Yb*uBi6fM?npSuDgI*D!KsakO9 z{H-un2`e>Aa^kCg@12WWT}s=miS1_Wt2%SKbCpS7dqiOZ{9PoR!X2}lOStk zhR!F&$G-f0<|nnN)5lzyyt;@t)GlZyfb!Ng(> z!lqkqTy`ogxv;`13%Th9Qt-w$tIQd-BvynB`LJBJRCgDW`2-6W*M&UXqyAJ1#<@?3 z)c{M?*yVmX6g0}T_^V_^B8F1shvIYYI50u{ZkH@sho2m}A>GkmlWW=6)9)rwfH?oo ztDlGZM~r>w23j(=xm1<1cc0UYHzJ6tT@oV@ilF>l#xlhk@UIt_H{S~7 z@I3}TIvt-UTQHby8{IwQj02oYi4zTR3w|X6x0wXCwc)50q&fKnNYK%u3vLhz)isA- z=z>-l#e=0!py>xwwA&ZQ-Y4OHfPz5ild}2Uy|+i1T#9jC<2AB~kwF;S9 z`#v9{N$r4GX;w!IYza3P`?U`{)%QA`V)*uO8Y0x`hfna&p3D%a+4gtIp?!lw-dg<< zU|O#|rMHfvwUzNJwA64n0BSOLZq$C$ zPX5yG5=MT?RXa`fQ9}N3g#gOcMmLZ)H7D%>O3-_m7Lhm+`{m*eTDQH(wO?NamBV^% zX^@}mBI7M{5_M0}bMS&L)aANa7s4|E-_f5ZOQEKzpqUHxT;5S8`yhRijqH?_2-dOn zFe7*J!TI++4m{(RK-a_@z~ML)B&fC2w%#PSi4$K~V*RrpNMU$*Dj(jnstH<*cwaQS zEpXH{hOedFvO+w2Jv0c(6yYjRd|zyXJy#96ZZL`P?Z}a8Aj!Nn$N6UBW00k0F7gGm zG!yv|ox&;gGg4ec$~0oOuyIX?nyl8-dkw4C@3z}?YgEw#7uvCpwB z-vV;k7*hbw0zRabYWA0!J#0fDqKHJvXqpuFL1H*k!0YK(J#V`I)l>1LlFN@OC=*;W%BiaOqi`|row{2_5L!JMW11a$+rU28#W{E+dITY7Q+)Ki@s zu>srYj}C2#fk(th#$}T9?Ty|S;&k7H=>PsZ{{%CRe?=9h|6g$gXlgtEXAR-CZl5$K z0%=>&g*!*`w;YZk+~SK&fi zxY#YD1s0&U%i~e5K3tNd_;g`Mco>U3A~8WN(ZRB?OZACdEqGxH+*oHseZ(v+_3z8p z$*ntw{jMd?TRJ5`gbAW~$NY9G&v9+!d1pi)lT(@$!;wBbcIG$LDX37_CNk z1QjxSMDF&gEEpaWwEX4S<47NG)6io(y9lMxxQ&+T-?kcadk+rDh+@eh-A-NEe>vqr zm9F!z$6@qfwP58hKu6$wssvxRU`yXE!vakeHe@x^D3mC`)lmWW&zMF5@}FT4j@&fn z4Px0^fl4JL@@eKw66Rrlu(NQgpafaeYpjuwP@wwK^vFp%FU|pNCV8Tw4$fI^p=313 zZhVxKRTK#Q?^_{W2RgB8r2!5BAP3PIdMFZ$_s6o!D#C7bnyo#0E=iL)s6GUn!x(Yfsk>B%PpuBa6eUj@edKJ)18V-QauX$IF!)r+}?g69ao>&TjCUbqioS z*_x;2+x;f!&<>K#$VVMcTqhyW{tx+T0#u&1XL{kg`#H42Rdz*#;Id|a+I*N1TkYS zlhrnbs}06`;V%6-@RhPK_nzAQwP+`CDWXlzFaiVMWRau#O~}TVF-;}rAD8|TDX&~D z=z*RyEdZ7DPA=7Ur7ydSlsxH;De`Z344%#P=DEx7Ub;N<$}{8I>gT4I(F7^-qrD$7 zcb6`zCGYJ+iTdsgFA$Gz8NCu6tyn{`Ks%FcY2SipfcV8vC- zoHido*%8gpD^Wd)OszikXmY8c7lZYj(mwF@%dKpB9eQ^L6} zR6N9O4c3v+QXv}uqlg%;HOmo0X)H888;$~mlT*uB(ulU3TOFMt@ZCWTUUxer6#3;6 zbfF+Tos*O1nRfj?IV-pYE!;;h*>52YkdXWdI3u&5GGg3}(Ma_E9{7U`BjGa5 zbWH`%i6STzHTp(< z%bd~!bm>RpJFU_ti{lP;9`@hv=A#aQzEKU!3frH~SK|DjnCJW2Kfy@BhQG{kPYFIf z(CcNw+a*yQQ867+LlI0$1Z5jin6Qf=ix}d(m_ub~*Rgw- z1ck|>-#-ZkNuU07>rYo;86-(8OQHiP-_~IR`CbHP6OMuppXoG$^6w@p(DLx60~@xQ z`}El&bD|{zq3j@uDDvV~!eOGXN^Vc#DB}%il`BF~Xs@AYzL>ouLs7yJdBbOm-J!sb zDuoolRWU#>)8ss2q4+@n`Z!n@NE|XL^l<<1v2o(VW1IyfT#>2*Cb|g+OSk}$-|nj@ z0{#H|*+7f+fBFLF#+qvHB=<4zNFc~E`SJ!6TB9vwj({_QKVO&!Z8@0^;S_PWVQXd@S2A$7!BO*ku3^KFsSWoV*!ra%3ulO^2xn7AJ(>fn})rom9Dj+})%H zWS4f66%i9(K4zHL@y6WSw9yeA+CLOIX?TgVD(v>}fT=8D5l1{Lg!9Net?3EG}uW3ZSHb_Z8XB z9kY@u6xUDBKI`E4n96pBvFOi90Rx%og-8&EBJ?lbV~Ky*7zn|4=%(T?+`0o}GGqYy%Jg2FSF=Blelb^{dqlif}MwIE%gt3QcuXht(NDLFnF| z-LN%m_103xAML!@A5N4H8BFT5TH-O@{{x3Wc)!L^;&2f4gD-hGjjLi4RvOoMX-fVC z%E?ji@60@zjn^7-pyc1t2T^(=|12~QCD;5(RyjlK9SERe#Ge z-#O5X2e?22W4f+F?amG2d^zd?fywmCnhBo3)HF}dtfB$#cYzusRh?B0dEZtwad3q) zAn8%dR478J9)cTj9NY*a`|U&{d^-`w2HCim)qc0mm#GV!ZqF)TI_^y9TsV*m_{4#h zKxQh;0ODmjIaTCpCQbwOURt|3*?$Kjo~yR%z?T!{F4Y8})zIYU)hDoBEl!t<5Nvt| zK2J|6BJ`kA6c%yreqCecWl885G2??J&a5)b2wR66VSc9lfo3AQID%Gr>-_BNt0Pqo z!*J3o=O;x5-Y(f;KL6EKq^HTNqvSO}Dlx9#kI=N70FWHj+9??RjBfCMhJSG-e~i59 zTk=zg0N9fPw7}FbR`mQwf&)AMaeZC?o`!swrDd~W8hC;X^l|55fYH^79;)j1>(25W z-9g~%x#H*B5?YBOZ#b&ax>QpaIhE}7`2(5`Y-#k2Q>Ut7p zglT^%+74gluZ912DH^&`QuizhYOcOTVOci%H*Jbx;Mj^mz4@xEI987s)1vuu7~{*( zYjkl{c-{}J%Kh{95dM$rWk;@i*%4cqyU+$p%Vu@D*{1<2uv6N@zuQm!KO(x)^c>$F zVF(T&Gjab`SQY*rw0f}V`tIVt2mwP4m!aAN69O?gm!V7n6qhMf11*0`a~n4jzUx=; zNL54x&jFB>53<(wN@Z_lSIQ-hD>WjA=46K)EoW%i`|GzGz&s5;v~kKwRW2F~25bP0 z?ytXYh)&*5=;YNI4bSIyXWzb5cET93O7Y3va>BSFl8H&8B;is|?iQ1`(^ns#|5EMl z)2hnfZ_eM{-F*9!>HdGOj3ib_km-h}n}r`V6wL)O%r=8wZJ+7UEAsIq?xY$r}ft}_oG>s1)N zPOE!@NJJ3P4#eZRb3zj{)Z39s)tet;Ym}C7mh{%hF1+Y+8&#(wOzCF$bxfiKLr;!* zDJP7&D^UsV@)}?euYln1nMrHDIGi>vrljV#BVg{SooRLVQTVmX#NjeN5N+ z1{G&z&?Pj&8RXmWzE10H{xP}*NePp!sa(ZY&L}ZSYw->}LFOVWvU)h*HqX!=ta4cJ z2ztZ)#b$*lW)(XuHX3B!Nwyg2Xwt(4x4Ly! zuS(ao2ms3gxwz27{eSLmRdB9({xfBA?mKr7ah#Jyz3QsmRN&TVlT5 zAiVS+_C<4aj1IXEn2@n-;1M`kmiFgPtKNSi?fB%=Ixm~;DxeT*o#-WiCD#Srtu{}VnR^_f(c!@1vOWhKf<4X9!;Ttxb!qI2(1wf8XVKHpF zgGNz@_+C3~#Q|X6QC-7!`N>qekKQCePNIp1Jq`dF4_Gn-YlaY#F8s@$)7FY8aU6dy zfGKna?3wl8u-y!$f6mKrKA?)S2vG)sIB(+{j2avwc9hF*p{~(5UNmUT81S2sd-NSX zN=Ycua+r$%=a{YoiaDdq7QXX7Ab{Ys;Rh`4WKm*}K-MA-VnPBWuOEw1zPA@_?XedH znV0N1=P|CKAzEwI<6u$<$S7JutTcb&5Y>i>kc(|*`jBoRIom;~DL(lgSkrP>_;xdD zk5xf9Csvvh%t~}oy^)y)(HQ_^>ccxul-9nrEQ`X&sAhaoIxvK0h@IWSU~gIy!A6@r z^&qt%LdmC{Bc_6zMTYp{tQ=!|i-}USNhDzXHsp4B_>_lq5zwc|vqhUK#*lvlz|D4n z{_5z`h?n-`zSs%Q(Q&ZDK!gk#SjI@Fo|`)am4bH3C%yzC@yWkAPfO-5%f@%aR(#eD zBr5-ciGgpyiszx3`o|`Fj3-W#7;!vuNdPm`r@<=Z0B$yy1NeFW+e{mc{1!mbn$u69 zVTlG!ga-HdXPQn+~4@1fm$!apkZAjO&pg`c1F7ABTDq~mrooe;9u`?O z-{$w;)6mgEs#i5yaYzBOtM72ZXD{x~{&R+$g-%#oAQQl}7-f_B`s{!0J33jw$D0Z8 zLO9w-H*`G#x=UQDUrcV#{^_f2BZemcWq=L|Kz?oev^pfGo3OSIcnI0NZVqgO-+~RK zD$u&MM;MG|gZf8IkOWFCMim7`2a$zP+D`O+Oa@r3JX}sl0P_cBjxf<@OKI(B6<=)9 z#;i9n5e!bFE0E;T$OnIcuNm+xs#;Fu5%~PGX%>QcHxasibl?`)9A}MbgKL-oGM_OiQ6lu5H##x_PQU&fy^5ATVcEr2K*J3=_bAN z#Y)IXuvpBdK8(4Jg^yBM?>jU!Fq(O>TV%f1r;O0M$m%pN!dQQ#f!MjXrcMb<%qit% zNbC($$Ev}M$mG;&*GvcoRv?B5;G^vWID2*BFDLG?f-eFaLpuC1!M@SmcZW(Q&em@J8j3orNJfvhE%PwN8 z5g1-hx$j8WpwfRUgbJ(&8ll*XEk`u5wTEaLBD%ewkmXu-!sUSJYCM-GWe>2v>v*BX z6583;^;`PxS^kAqc7wuxjR_NgF>H`{35~a9(PA&8x&VB36w>hEP?_|C?o*~RfEex8 z+R8@hyx7R;DpX4&b0!*WYIth-#jgiWP0&FqjN(5lO<;d#Q<~sLH03DS@km#C;?w1} zT>CHWkPqid`pe8{Jq2%Shx)lZqMYFoV0F<@Pycme=UN@|BfJZtr&I}Ocf08Ps4pM< z2wb^&@vfxzxf0d{H5E@u+Pt*5u_8=r-rvBHV^)@!t}ZgLr*MS;0mzJicjc3qc$grf z)6EG+W;%b0kcTwM8JP)2hN@hI^iFD^furp1ZVx^S2G_yVck|H%rul?%I0D?4ffT$c}}K9 zUWI>kw*{L}fH+Wp;8YNZOb2)?HwiFqdrJL*SwWH{6d-PKV=>f1ayxLWH!itr{A&g% zNu3ji{tC3-cjx#DgPHN*b}W_}47YK+=h|3X{v`?l@{gJ_M#Ln{-K8B> zj5w|s^`}-0fH+meiV;}x-(pQqQQB>^L;-(^9e;A(J5vl&nW#W;lpc%PS2vONc;Uo+ zftuRyvH56`ieY*+VoKup$^NwGg1)=vImsJli4|<$|Eh2TK=((j#GG1Vx{a$Hq7`MM zAS?e*wVVFs_t(b{{S-Z!!+OGhq&3Mz4K@R~G0yi_4;oSi#f7M!9ZyPxh ze$TJaqXgIt?_&r1kO`7Of@Ba(yiYreL0f9aN@U4Ux8h`e{Z_HazC`%|vcTrW&1O?P z>iFubqWSF8jL&|1#?$lHi)YVY3pW#j1!Lsw;%X+OXIiM4HJVB7W*4j3zvma%_2OK4 zF@LdK9&Q%W&kvib-R-IKf5Uavr0;)n!x#U#c>Dad5~Ep_X2M%oG|u;t#+dOK&Wsaz z$e!KjM;PR*!G-PrF%^df6$a+g#``FgE*y3{uyA)^84tl<-o8^9exj2_@Uax3m z_ltVju6Nt?Bc{?%)(tK?I~P7MEd?|*)7sK%ElNsyGjFP!)WYfTe@Tn%+G~2<-O{4_ z3KwbE;#^BPzgUQQo?>4&hfSL%Y0|WdbJ*YwjJn-?FUd!O-ndow3rI0zbtj;!q>V7q zhE$!xs;KwERH#?mse?pwG?ie|bz^@2*SmM6nWSd6*BYbazDdX5ZRxbLbEO>%!i=R} z-QI52HONLuJ8yURe{`e@>}U>`H&we|e=FNlU{Z>SWsbMzn07LcIHO6oyLPh7-Suvh znzS5Cb|L};`ycQ4BLC`r)qHvWM}51gmNiWX?gZ+Mimst?M!iRd3crnp3h%w4LMIq% z`r4#luECd7y~oM63*7NPaAO?^O#SejGa+HSpb!lT$=>KDe?OX-lwiIyy4MRY=YS3c zTaf7Foh|&f|Mjo;QC5FdO|$;AEm|x0jE4a*7*4({gPNP{%bX|Q0}s6cnbPOzHt-|^ zSX@T4QIwCmkU`NlJ>dSijX|yX`9yN1Q zlkHI7*L9VQTU#)0RRBR{ND-(UOUfzOgVQYNp*j5V>chJiFJBei5Sn@D?lxIGtg@_P zLnVYwYag2ECWiDK$8FFBsy)3$?PKGF_MKr|gnQ)+e>fU?6=1~LH!aJ`H_US}_4??P zLX&d}m8Gl+;bQ9!<7uoV0a1gOK4;@7J0inIF zT3Vvo*EIF+&^GH;+G?lDiNs(bFP>nLr^te4*hqukB&*@W5uh60spgl`90tfscjmXH ztys{Ve;sT^B)viWbkdIX)<=+ttYmN?{NCUY^mp+s!mZ8_eVO$bBgHy}sB!FP_rozk z>;DLjC6I;Q|Gsr_3@5$^k%1#}rH&C<3*WITtrH?+C!rp;Pz3k{LL_-dTkGe)r};+_20O|TMsUS^cpy}QUF%7N z!5L$O!A2Kh@a1HLAtwDpoJ{=yC(}=iFr>ApaA;(O6HW;Ou9UL`lgQ#Uq)4l~kRq*0 zAq9JVjycA%rEQ^k$KHVfCGUjO zfn|=&`Gi%$?s~jGMO&=sB;0cpX-`>@h4iD>$Qhx44Mn-yR@s~7kx!a<( z?bAGM9v>0CNiTQT^|t?x%Hm@O&kpjvWI~`;--q92vLOR8H#o>F$jFq386nNo$(KGC zRf56>@ejt~zKtZg7;(y9Aeid#e=`Fvo-z9iXyzMcqsAwF;fgmARLvX zh05LC+#cFG{rrk(HK;%urlv)nzT#JM05IgI7D58kJ?S{*SOAJaKAqAlu%$5IUKWTD zc)@pyRXMuAfp11J8e9^%kP{PzU4N^Vu|O6ERSkGqM)AcM^~(Y)Brs3^e>Ek+&>9u3 zmgzI4Phoz&!?1QoFRA_(=3f`aKvsXqb6w%n_uo_HMfzO9<&z{YopG6>EupIb_GLmT zeQ>%52k8_XZF9KU(g$1|pOi_!$Alc%*A+?qIDrU25<5WrUN=QG0Ra*ZS8m1QgL=?v zP`!;Cthcz?6|#A+$yM+Oe-(XQRn2u$yaR^|ZY&$H_IiJ}ZfYe}DHg0!P46qPb3m1-i|T~<>C^fvj;^<7>4=;& zY5W+pQ?L*y5#=OFBz6oBZ!p^Ar~o`|zoO9*L+I@I+MS(`^K!_rf4TfY*zdR-VgJ=A zX30SmqFC8b_wBxJx=bnV^#oCaIHknngG7?8@_^`JQX%VrxuTL@k}Y(MWGhsXZN%Wi zzDJkGC7JbDO7BN~ zE=&dK6O1~ZYMN>*e=H_*NE7Tpk#N#fZ$${g6dBZp7VZH~S5(e5a?CU^tJJXQgomf< zRMtCHd+DB`#HF$F8=jERpXGZJg!O`IG`Mnq0k}RPgC73=dE$OhOFN+&<4HzTqh~CW z%?ufS*1sR9Rsr^D7^alJM`@C700zT)9^VJ>G3^sf_E}BQf2Tb-oPAgf4`&zdhloKv zN(}Z$3@n4JWyZ?m7+VMmkmIbN>p{hC@mv_5}w-8*zpj}A168B^Y;>YP;PK`BE4vPnnPp&J2P0O>F%iXGzyR_1kq7& zG)to3A?48_bBDlltyFV2zdW?mmo7R6a||YEXbjz-e+SX;Ddtr0omnY;CJtZ? za(|tGL;@28F*P?flhFkzf9+dKliEfQzVj<|gsL!^>Bl^h%E9Z}xZ=toarWl8Tv`@8 z*z6+|suwy?1T3nHE1ruuTIxP4MQit9 zQXs1;MX&S8$0*e~zo=LRj+8+!>mbG2w^=3PsF=cPqrht=hSiPA2h-8*O|OrL-D&=@ z2fjO$Rd>suF5?fMe`m|Lre>)VC3e327x!mx^T}lN{-()P8&4Oe<~LVO3c)Bauu%Bv z?Cs>?@=<5qF^h1EV^D`w;WnhKgs)gZzt;yskYlk}q}6dAlp>rt6`U?4NI5WB-zbZA zCN=udq{##YB3O~j8?>=<)<$g}%fO9FsUUv~P9hOqsJut{e;3?LMmJ~zWj$y8N1+c5 zicl2M*NT?B#sxO}S`n6VrUk-sWqV#WA-L|V1%j(_+Rph)CTPh)t~&ph#uypJW`Xtn z$A^-{2bGb#Rm1bcEU6#v1lqoPl~0H9I=PvaqG{s#X8~Bke;=82a~=e%(7H69DGah< z6W7e8T79#me`+;&?OO11a7r7K2))`U?d(SAbsM7BJstFpZYIO~=|;fawQ#q4|FLkp z%8e7yU-?UjRT+d5%(^zd{#emHUT-%}~a%kLc1ujP4_*4Z>vw)~9-Jc5* zJP~_He`@@x&Ltb7W$s0nW`?c>?0cM{4^<>9W3=6Z1g=PhRBZ%~w2iEPj6pJUdq~6t zDTp)tkv_+O<3#b6WH5Cx|DP?o4N-CgXVOqYa$e#5=7}hw-H;O6Jy1e8PD<#uqJ*ok zSRI_q3@WT!p&~rY0qit5MI8*On4v6Kj)%#$e;uEj!;ub7ZY^~o;dIV%q6$%s*te+= ze{0sYDpBOn?YpL3A$IY}LuHrg)%hR4KYM=On4m(+LYNE7UCb;Q5yj7JHcQ4mk2c2) zGek+LM9(1zP&qP3)r8w@m#eI^iBha;`zgiN^t-mGXsZnD_Mm`-DF#UOpa7*72$t%P ze?aDTp88!Hj)eh~aUl)IszvPJ9BaE9axE{VA_*vKNo}a{mc{9JNWE9my8P~L{JA_G z#Me>c>o$^P5954#dtZ*&(yka+?ENiD4>0n9o_il;TXf4z5;|xxQ6hPTRH{jV#JT&5?bCUqtYtbxbc#i3 z8aRSi=G*s>z%AfX6}m?0Wc%;uqJ(lHsOhw`d#Ih=Jx$gL_IE*ujt_}g6U5j1wB00;D`Ui(1rjJ>E4z&m zqC-}ZYK%Y$8T$jq`N{4rg76*;lxE|j7Pr&J$G7RQ#|$>1ex4FBcF?TQ9$_zNbAa%_ z*v`{rTg3sVWZS36Cz{b*Qf8I=;# zf97~H)zv|81^O1BI2ST?3*xsYR0*GMQWT#8Ek5`Ct>kvPD#1!WGPa+aGIVgUwO2~K zY+8yH7up(<=H9r@#pq83AeKKX7l4uEn1!q_1w)q&Ergn|py&(Uy2Lw|q1poz12Hf$ zld(Z34mLFkFHB`_XLM*FF*r1rVR{2Ae@%}Yxe>kVSMZozD6&3SEWlV`Kj0)l0xZ@( zBpLW%dxgM=F%yi(z~9}P>r(5m8Ja-qaZ$$O6;{3(pNYmGlTe_E6( zS&XY#P-@6WLt{X!pk;_bLKQkz!hmKa=7&fxp+xmmU7|%LQRy*)BoUD(Zup4|x;Om7 zRHLt=)rNdeBsSz*BK1&&F~GR4xS`W*h#BlaqmvVPXjmC$C>F+HOip#Qpgy%&f-Nz7 zOw7dS9H1-U$ z$aXgXPqy}wKvEn_04cGd2yhS&ZD1eq0jqe@!ZDp10Bu7Ii1Jth0IA4mX;SSJEsQXj zgMp};@rtoE?LdINp)nMPebk5IaE(@mqrfUH_KTLL7LZFRsc~Re8=*CZe_|idJHT&n zDD}3I0{JGfIDE3T;YzS&Gmim=NqqPdV6+1L1g>bcgvl5UjSawOt$OGIJdKCRft|^) zkzv3Vdq882L#2)pHoOa7ZGB9Kp?ypT3tAu3!HlqO4DA@|eGDyN0EcpT?3H2(uc^Zg zY zAYuoZe@g4W|Kp!Qy4-;yrpyF3Uw?f6{?+Q6Z-(^qo9nyu>{(hrhYM)CANCxEH(-7L z05j0;L4)J%{sGp|Uwgv7+a655{_*za?TgJ_x=QOGe|VnOFE_v3f2B|BzWnchHt2Z$ zugz-xJ#^Sy-+c&8HO8;jKW#qTe7t?T`LGQysRQR9c(o21A|g1p81wbb@{NSilKtnQ(%B zffLx0P9VMHihzgko;yO7xaEk%2r{`>ndJoj0I1>vs7IW@e}A9ct`pp@7jU~S%k82W zyW~!}K~v-nSzsJ?&BQtf)p{1S>si#El4g}`?YHd%#D_jW^)w%-q=!DR-Oxz?whby@ zg6v9`ITGM|va6I#YEQX^W`F1w7wSTxOXO%gbP}W-N{XOEPwP?3a*7buT#!>z!@R*2 zfl+WuBT>%Af5#)>Lw$Ot1H$Y)?X)nsC;cR0rlgS_=SbeZ%vm^}BR;_qo6iwyOEQ8h zWz5LH#ZV?Nc?N;G0{SxR@S}cCn56$lVp~R8yId4xIYgLgKEhm=L^u)|M-3u~c@*Vf z>p99BL6+P5ER2~?WcIJ99p~5s1U>_I4=7aGwx!KNf4t;Afm%BcwLPQQIgWoQ{aCo0 zlI9L+81Bo^WVTslf(&v28Km520ZC!JCRq7Ij+Pgh5ML0)Lde};2GwV_Jua7UaEvb}gig!#Ia8Vl#R zvJO&eynBEWGTuQ%gq9OIaagITopVO$S*gpEbd=lD^h9ZL#KitvE9=@sp3`VPsoJ9H z4aMzcV#G?17k)_jO_P-1QJs_VH$Eq7wuvl)1?f^fnnfIsfX7RMy~^QOiaQ_RiKyAm ze~g-YMmdXQ&Av(^wuGT<;u6s6d?r}=5}8(wm>Gx3eWrzk?$Pr1shDYW4kPTW+pQ^S zMuFn$zn`qbCir6)WF59>j#Jbg-9t!1H8rKPqU9}t`-O_8 z{?;cVgSYdx{xCB1U5)O$!o+uli7y&7q*-q9RTPQ>Lb4}avW68KGSuQ{CRP{L0!BIB zEi@!`PH`RRxfq@ao^JG2bs^GhK#-kse4s_lxs6Yq=xAMX_xjEIO|Amrrp67=Mq( z|6U0gc_I=aILnMqthJ1Dl`K|QKmW>;C479FaHf2gd=8?kL>bSF#2;44`>UU>Ufx{& z?K_zzpuux5lAAlwW?80WVvS`=_~d4p{G8UsLp7TV?roYc7Q5B#*PFNSL*M_QlT1qk zs>3f{fw;07MiL#Yp}??ql)8F-)ANnY9OD8u zBbfvj9dQjvD`vDm>>24L^I10ZOupEboAqq2luEbxrM#rYS$HpYEZHFKmJvbE)`h$&?ec{CqZ-T1gm#rAi+T>>`So#D6jGc#zP{ z!J>1)tg<9kew@+-5tAU;lRHh@2t^q)-1W7I!vFwb#qD_j=v*f=mI3lcuA3{PIgTQ; z^lqg;W|M@S8h3(C6PN%U#$=bmgwQN=`kTT8KkR?>p|}tvxL^t#@+?RkHpzI$q&*7} zsDfK@Aw(FkOF2jTdJNUAfq&g->Au}sVKN2vU0ntk#wn;3*|Y=u@DKzK#gAELJ#$u_ zhY!$UtiBXJKs3w5AA=9~OnkWiN83SsxUba>V+_;{dl@n@aNx42ifu8qTQ#6V=j=2* z%KlD&;fCKr$%5D|@?BF9bJ^TYu@(a!P<_(!?)8uVdhzP@)Jhh1=zo1H466iSCv&AC zzoY_N!AItl%s3JgU^CL5*Go|cKd$4H%uv9uvQaN>xA~XOj3f04h!Ha5%T!JQEK8Yb z2hOEY=9J0_%*LGw4^Uy;#TqLIs+GAv1|EM>!euxQy!j&V&>k3<%1(jDVaqfO(w)~> zQ3rRIYOEU0hssYC$A51EB!@FqI~ADpWk6zqeQ9ybjT2BgdTP@UL+8%p=%~ZEOLZEp zfkJro&0^(9+G-QX^+lN2z>ODTV)mI#%>D_OnD(cq$&t_i{gKX}mnNWtpMJL?9Nr#! zVnq9SRXcF8Gqkm1~wOp9H+tf5!tY_ey zZCw@(O>B1Crd&pS+xr$eqHl_{f(hgISENB^Xf6~Np?i_yQ;P7;r@Q!kLlTDfB@E-d zTj(ECIt-J#Tj=eSDVBL{2cQX?Yj(FYk*;#s{1-NxTW8bvyW2S-3X{o74T#bVvNPV1 zMh_}!)BC)^2Y;8YH#AZF!i6dpTY3vusWNylz+Vnr%>vxo8}LLp@PkJfGs&MuR(gmm z4P{mBnr)qf;oAUan7iMg-p|o{eux>*xSYP5ec~l>we8%Lua{E_B@|n}zwG_;@;C6} zVV#+ZsHR#}1{c+2c7UMtLkLnfbG%ZP(f6^WbhuPp1bt7k_Yo2>80JvBmL%y(G&QcORjoQvwxkOa>JZbt{0DM zc+W(7yW36$#&c$re>N_A>AT3+v{DF%gK*rTuN}Gj-?T9ryD7FwwE-}eyhFw;eLw95?KXb!fv;Wiyqciwq) z^FV)%kyfT>ZIfk#5_DiOdR#y##Q@P>=pqf7%334bRbU>dIFEiKa+^+2EknN9j~4HTE&vQi zx$y$U8sWE$3`*xs@zmh&`_?k=XM!not?*7+iYQ0%VD#?xrfC0E4=RzN|M} z@Sw376pXUzx;brN6VoPdwz1LxwFTtipxAepUw8u;0X~;_cmo}OdlDGXkLlip*wurc z@^16_Naf9ZxR3ZMzn4Q|_G(joO=-4gErF92b&h_);+ZK*83_?Y^%Ia6xO_UEiuM55jvB2k(%x39X}`+Q4_QE`Qp_AM&f zLh!)5Z9C#UNaFEFiYg(D(D=$l(Ogg2>41suk1YodHe!E&d}5^XNJOOr+HOxs7>uaf zM>KTde(+#988;Mzr+UGImk>M{N5NARJfZdu6JA3$kATueG*qdO15=K|zi18v`sOez z*4T5@IG{EL6s+t8v`0H*>?B=L;monvi3|?&Vtlea8oa?J%Up=qhaj(=iGR=(oDala z-(39<2~%YOB$uJu0~D7)mID+7F*r9imqGpkDVJ-4158~UW&k9UlFd#o@RAZE#Y{;- zn9#G6#q6I%5_}5t6oAR(_%6S`Sr+quPd>bO2fF_;ou!NsP~o$at74u?YLZ2HN`V*a zVs-AG=;UmkGLsaWIsUz!Go2L0%FigXmlT5oB!AHBX3q87uhSDMO=P=#S$luV<#JnZ z*7>Gbcy~(o?JVU~5oOdY1wjPb;lA3~FmNN1i)y*7th2Y}>e4?dd@#v8RXFcawW`ZS zvG(u6cQRq=nT^wZ%KVG#e6ud^ytTvKV=5z*HRc}0974-D)yv z7J^U{Mt|Af+#u+6Qq@HaltPjWL?4uY9e;oR_~z(1wuA;EHSfccGlg5WEuPy;J%xOQ z;mdS?kuOJ|KF3zTO#{&56)y5+ZNVzIN5=a3@F&NR_yYZ?F=|RRZB93;x5GA@Sip_xt(9*Q28uNHC$tjhT#z2QfUeGpl zucEYoqM)@Ga3-*1V455o0WL$F%bnH8`p*wDOg-8#hLHU&UM3xv@5}@gJhDv|#J>~c(L$qmm5#`WgR6= z5Qb2YIDx^;hJD3X?B`7NXwHVw{_fl*K7+;4rEgs7Jf3iI;1zLdpJf~>Ss+E3uo@ zr@*b`a+nHsX~9ME!v&Y00y&RUlJdm_m+jD^WKW067Re?AFsy1H3yH@0AT>)Lw7IVB30 z8YC39PQG`r^&Fad4z|oyZkxabO4~pg8z}qW`&`=QVC%X8;66_F#qGM>Y%q-O<}5$A zAAa*=N)Os98Yhiqu`B9RWF(Xtslo%LW8lp#>jtUFkCH!VD!;p90Us5|DtLRbuC9IU3vuv`v_&j}5yRy1i1$4_ z;^>`tq?k@p?!%*kc1Vu`Mh6Q9i%uVWE{xAze+*vJiQ@&OTZu>-t%-F;v0+61@Fxf8 zwo)iO8vGy6>svQ=W$eBGoK_z9{?lU$aB?k3dZx~LU3x<9P>y|)uoom_ zECljm<9Z{eLib}ca3WR$_G*9eL`_MoO&Uxz)oaQ9t(M-|YH89Pu*3_zSJD4hWKzc1 zL|247$oS*BqUQX^C1Tf)sKD+I&Tuk7f9aAPdKUO3?TCoBBeBCk;B4wNPzb_k_KQz) z?6#ZGT?BghbG1Z4AernM&S69)M;Q-~>#^g|my(Z;Vup@LLc4>yS5aH~Xp5e_AoX;o z^B#`Yw?>y@0k_grYVR(-6Sml1E(oqS; zwyWIzUUcqtUzY6%9OfvI3=UQ`!-zc(Qo~5k5B=buZ5t|Uk68SD*XKGNI%ydNB0(Q` z7G%d}10C4@N;hLjboSgEs3KsR591WS8||p$lV|?{PlJa9m!aAN6ag}q(1`<&0+KJc zVTuE40e_}Pw1_2(V=<7q4-*L((;-S&66uKqOlD+pG?ai3^RtBIl?7 zo!iVdq(y-RpmBm)YkEO&U^cBemtd{en#I- z(;gkFGBb!rOaskEBcI_J1Tgofb4Hs>4E&leT?~*9d9iJ%F}L4c+O)RC4hNphYbpXM zPDR!NC)#>y3%9g0s(5HhH!#|%rk%oBkkfHj_?63|Lg8>NZoVeJEl^Mdp2hx!o5U|{ z@_(L6+luBXtCkv$BzfC>ci)etA*Zy3>niPPb~xNFLmA}Dzqub#V11%z#FcY56IKjW zu+US*v;skj`xUH!3@?FZ=azc%?%U9puE4bH=sQ7>2oVH$a!+W0> z_r5b^pHD>vD7KyFrWb%b`PMKtj|}s|XMez3Q0r8?4#su@;6@U5qUaUZcqrchLLGjY zr5vY5$f+KD-~zOO>cQm2yFdr8uYZFN|I4uQ^E2P#Nyvh*{~3|gEL-{xvzCBc3Fxd5 zZTtWSK+~UTO(?wf%?X?5J$bxSK`{t;H^dUhhm$emx}WNW9~fb=9>nhsS&f$`!+&L# z8ntwwn!5uZxp@fd^k;ka_&Nl7IB))+%tr$y!K&zor+#{JrN5e<>D=br{_pbk;GHLA zk=s+_WgVtPKab_mI?ozZA2ROL8E}mN%_t{ohluZ2cd-+PxoZq`Z!R}&S{n=pqhane zI$@IcJoIs9QkihiaR?RGC@5Twp?~`|qLY%#F*2d0V$2)p>7X~GssT7KOR_QrB;Ojv zPf(7nxt)$cRSmTlHUqu*^3yr5jafAu=PVwHBK%O*SfV>6rISvT8J(`-x^|?vp=Tfs zP#Z%KUT$awChCS%u(2H#aZf&W(qP4c>2VncHSb4EfzRx*O+R8aYkW&2;D4hyuK0(a zW5JE;Mj$kLrIBIbk_06Hnf*q8_9G?Oi%L$&@0B9Brl5CCifZrGA`#=xo_ic{5$;LF zw)qix1YA*LBty%ow!E4(jqN#Pb3o@n?AZ)7%PBIODUIo9)<;LdcUc@9kjF2Jf7NA` zGjgq+`taTs7cAt_W9SeB@_#F4`L?^37pKufK^fgO$YEwH76zok^XoAqcB1dQc5u=o z$K5Yxl#wGknu>t-HwKGjf(`)e!e9wjn>IVq2;&ZV`3%50QDX<^_M8`6F9`)poG1`e z$4eS+ipKVXh0kO(Z^0j-4yV$=msP-BLX4?mm{yB~m=;R#PgbLIf`5%@qvCgq{3ylB%+z^Forx9tmJOawcXRM-!T2?6owhTv#J1jfKL?g1AtYb z7%q%evoIFY?bfzrcT}*x6Dmk*Fjo;;-vSHHBM4pKYohA|!2&72oMzWF+o8fu-{B6j z-;GA7ftoRB!e`Ce@_&5*=3uPH`nowENi%Cy(fIB53k`QTQB@nyL!%GVWZj4qIeIeO z9S=|_3{aYO6E4E00VX%p<%^VFbi2lwNz5&rN$@NKfRuO=i0PaPkE#HFR5)z#hcZ~=2twNa zn!*_vV)J>0F-z2CpCn>Q63_S~*IldxJt!Vp?vh+nfJlSCU+xRPZ4C~oh5X>1T$^Qw*s_((E16?m8u%}~>vX@*MU1z>%4i6N zI1q9!*?;v3+=`2f6QW5gH(1)%)yb|(zU%{n3Z|s|LxR3hf{;0`Nu5|tW^R=Tf57RW}d&>F-M$E-crsHlzFn#>#id$7MY=`s7W z_vf^>L%^_0F`q+~cL5JKxmC;;lY$~imOJEcEPpil@r1^o&2O1DQ1a*FsWrJ~YQQD2 zaBi~Q+1iRwHO5Usanybc9Ko1~38{dFQ8N4QATT!h3mTOD%r*+<$vw7JWYG8t)Yr3%=;QGSP_irqB6N{9 z@P(bH8aiF(16p$Xn*%0q7A^!dmye`7gi{Ib?Z+mNf2o!<(DK6Z)~e`uJ4!!MmlJ)6$gy+P!H zt_@9W+-3L#GZiwsv~^fcvlNn?QDDANo8idrm;x>y(5FXyn=@_(HckMj{= zWe`U{LP^KKNc?+bGcAwFhwo27M`m?zg*Mg>MH7-52L_!rR__jZ&k+Ojv(v$tzu$Dm zCD^Oxjar`(hYCJV!i>WMGt+8kn+pr+@uGy?A+v34qe(o#7i}#vw3M_FRcv-orl9h> zNpT=6n(6BFn8Gh63nDSRw14p@+Y@lVTq@G98wKOe|MIP_mq)_E`|cS>z;rZW_hViZ zWGW9#{-~D%Z0t;0x*#Kzh>YgpoEWT#6%gNTVO}JdV6eSF`T%?P57}DUmuW9jxU`eG zjsC?pm^7eJ5`j;aSO^ZY*(?vlXw=J_tN#M_4=jiZWo~41baG{3ZI`Rs0}7Y#p92&G zGBhzbm%(=dDSuf@bK|%XzR$1Fkxncu@gl(E%+$)UQ>3YKV@Ws35W)Y{Ik zPd6IiLj%d4tz1M9XaIgdfxk8y-Cw_;e6?HUs~_Lx;rZd_-4CC+S#i!vt;Oo*ZpDRV zipy1@6%)#=ZnmpmvzzLgtV~^babMw;=pLIExM_9`WD?wZ@$&b$;zV^pk*Z1|V zCHkfOrevkjk^DNvaxHDaVAIu_E2dNtICmeuo@ZNVri6%WUAs-nDwrwwC@L`g%h$dd zJ~h?8YKIit5|n=}$1fO?jK%@+j9@Y^R-i#9OG`xjgzDF|D)a39)mj;q_0{jks@+ud zcDTc*&gfVm1Db8quB11)b4WTUXq9>V;d=C}3TOZ=pwl}9%WRoBvNWaF9!C0l_~ zOC6P~N$47RE1BK7ez-<*o(=B70|=*`Ulv(Y4?|PU=VX8OTh7IAxz0KKa3UM(eIR-y z$vc`42kc&Gag-lesA|iu*2XHrGEzSw@uBOzVL+D)G;CZCWU}O8Gg%5i?;gay?LU*&XTpre$1^sEU5c0BtSClIr}4>3YiF!D$u{W+4-oYO zr+B%tJVSp=nuW=g}Wo`Sxk;3SZ z!W166i$i*X;~sx+N8E`Jhw!=7xd3OZ+ievv3Zt+?{3eALBOxNb<_cVkDWt9yB$}TO z#5I5T`~$tARlHnu<{)iT-31v&$tcs|_n5HA%#di*X_)f?QX)2DfRwmj@+d|#&edh} zc zk-&+|2xpYg#(+CgR78LuTCzS(#BXzmG+lq?wl9J1xehUTkx8N_p;D~~Y^|KB8fLw$ z@N~4ccd*^_HAXRrKpjoG?Y1YXwtAw$448rYfoQ`nV!n&?q525N1!9f@@obmMA^u}g z>a2H=LJ|;A7J^?f78MX;_9+-hsKo3DgF}qzZa8!p>G60u&u)(c?KZ;+zWQ|T=c|9F z8bH<{6#4TDy(~Qw*?He6mCx9W0wHEjk9{jyE|$nAO%wR*h){xOfw2y<=73cnz8-0_ zx}n$LG~Ur@##?c|;sFVww-{|t&>b)`2e(Xu`xjtP@9V~OP0W5m7YG%h=9m|<4A8{U zvWV!37-d0}Qe41^3EQ5n@o2;Ez1Dx@cW|J6*cQ!o4&$eu9^D*}8)iIkF|@Q~Fc?Oy zyQV+TD4I?hFkM^@=Oi5X*x}mK54#wX9oVdSNnx9gg;^qpL|Cn>Q5`C%WArDbAaMs;{5D`E@Gp>JXIi3-82_6JB5r!;=U#Adq@x#(6oLzxC1j zfXIzWAIS(1nluh|$XS?fpd3M0626e+B7+{zpQH?}0uDwy`0-daLz_`~^+WLS!4_sl z3**b&L>R5^2C8BqTf11M;TwNxXPv@mRGIeyJwYQ>^dk;t<$SFBLd#cM7%^{QQj8SSQ$^;IT|2XOOoV@bP4-hcvuY&i z(@bDNi-=8)P}lq`b%kU5s&~mY97=A57%pBD0cN_~bR^n71fL zTx6q}o{CnAB|;hRQV7pNM6|ZF01=@%hWc5Bj2=Ir(VfqfDkNev1`HgBAW-`*37JFv z-6cz@wKBf!UnuijQ7wPP*g>3Y-dV21+Z*ont#`?F(u=^##cPczpRSE&9qy73>?lXM zAI==DC}g6;g@A93kfEIlE>iz#mXYNdAru28~%$<{n+wap^pAKM!H3^kL(tvCk z3n$QC$Qm%B^Ft8ViWRC{Vy^1@z@>IggE=KP8QzV$j>bN1%~ zR%-pD>+qsT6#g;<-u$gFyq&w_f!M^XHg7*PBYt06dJf)}em_Xf*D1gh00+TeiR`%$ zuce16Mn0sqsmp&6ysoRFd~p!2f~;?-VPbpO*R30qI+9e~zCW!lgyc-Blk&4jmKR

v$=$yB<{i31IfX>8YS;iPb$_#Bi+Q0@o@m_~HNQ8%a1oDJR`c5( zA+j%NRD+iD1=oLn`q$szfBckU3zZdTMQ*;mwv!SXgx-JGp)fj!%>ol+@h6Uej_DiD ziZ+PBqYLlnLqG{N#%tF{e7axu_i{&HoC4da)`pj%o8A0@Q{q**DxhM9O7eb;c2cu4 zPu%NEUIyl4T@EvSGb>;_nmcEIo@>VCpZ|{s%54rip|Eoi4y_MLFv4CVNa~vncvjC2 ze5#6qrHg+GZb7j8F-t+dBl()zwTo+6L3f5XOGyXxD%LUb^n|J>M zwhscD&6lCt0~7%?moZxc6#+Pxk*xzMf2~(bbK5o$zWZ13R%#+K#DgT*nH*%dapFlP zZKauZCdq*&VT&~%E=k3ye|7U!kWmiWVR@o{_QtymI7QHoIp zQpRM3{$wPyJt*-X%3;^;2b$z<)0c(r%I20B!%h>C6`vLnOeXEwt?r56-q6^z&FZ>p z7b`61M&@qPQ4_atnu@Z|_kFJme=}9)!p73@)W(g@hq5&$!ZlIA!eFtIBK0qvfo36b z4y1!1ml3zhx?4@0esVa@c>WTeI0_MuMId92qwSFwAHpbny}ZJ}UaV=uX-p##Fd;_y zhn?>9oL(UUbr8Fl(}eqk$tYptX%MiRRkspx7|2weI_PDmb)RRI=5zXEe+iO}2aK5G zD)Dc+wkFDk)(}TfBgRErRippS{QXa_82h-yB#r!b!TcM;hj+_Iw?oiSmsMqUS?D<% zPOQE*$N*kBqdiQGdyIC(*PxO3*d|DD)N0wl*&{5_UE2c^B2E3@r!90N>>z;WL)-PU zfmdk|C-LZ>*Dt!PDeiSwe;Wie%P0y%1A;lnuExD5*TVMMb9f+<2?$qDj!D9=AH$fb z8fx77E&UUIXCVt?<7iT2A}D=hh6MrW9$|+&fk61wu*3l^Sa#t6CDambAsR8jo=3fM zH6~U;9F6#l=q$u%`4yXa^xsnM5UC-%vyn9kBbcue8heepRXC8gl2c?Vx2d3fTQ9+3 zp{b*YBW_WS2llipf4f=D72?6}Rm?|skxkj}sM>;&RTUA=fToz1jvUex$3_L?aVs-A znu3T<5Bm<49Cx%jxNl&o`+YUcT7{M(VsR8?nrFQmX+X=0U}ss(OZTqk8JbXUyn|Ls zF$NcTQZ}yV84mY{vUY2@5%ZGdyOD2hyor0Hs8Pp;%7MUFyTE?1u@%~?tyUF^?SC8gtApFtx;CdTDCjlX5JTY0zn zgII_Nl*db&a}7ne_z2!v)zTN!M2fe@UAH2Nyi*%cd6ETo*GuJIW9`~Ofl6G@bv|x!I zv~zT;wTeub($bY_?N6#BtRl?ow5@95B_hCrCB`^jYatAii3-+IGGV>!&2>)1CYcgT z&A3$)SpFg53w%w&BpB&QcU_saMOA__f6|6UgKHGFAjKVg$jiJgGI}V}gFoJZ%0#$= zf43_chZP+(U=}QSR=njx`wx2|&BG<#*Rf&>?sP5az6KLs=NVvg-V(tTlks46T5cDi zQ7jT-3eAhOt&5N6`6e;bYWRY!);7}NW+g&IMe_+N_&})O4j7X0Y2DCCQ6hi5e?h~v zG;JCG3%k9E~n4%bhfUGo6eScdPrmOk)bazZ-}bI@%7{FI~c8X`U3FP?Nw zzNOvmb?)VE^Cqi{_iLvypQ~DwAArcBX($SnyQYJil>zUrixkBHR9cwuZ{i7Ww?&h6 zO*4GHgRk?n?drUtgZ4T_o2>1Ff5jbEboAaCSGCvlc!Of|xPW6M zY`-U?x+Ci%>;EG?7pBHRbef_eWUn()#M$u?lLr5GHc9_0Px&O z?j-`K_f}fx-V%%!CNKide_}C)_p@>bzF?%81p7MChKHAzbbkBuA#MKsg&rQ!l4e2W7l3*U3WKL}-H#L}qI%Q2f0Am4({e}aAFrN2 z{|X(h3XIi1j}vHtsj?Hh=W0yQAN2VA$eq${wCB)2k^E{ZxH_UK(M)QeGw`YPe&T&( zdgSUZZt@a3i2PfD??AAPg zx3=6)?w*>K1=LLW-0Wvxl${9$|)FD#9GDdE6&_JgIzt zJEBPG@ushS^#d|ofaNiym03@bf?HM}st)TvcP_Eq zRK!~M>f;Rw$|XWilg61ggNvafDM}La;_)gL;pX|{D|*g9e`fiy^(wd~otqOJ7Ss}v zU=bgf1N{>A2DWO}=OFLW6wtdv~Ba+ zpTc!4>8v8AqrnxrC6z4`V{d@Xrxf8cX#I+;Xh{&Xr#ejcU9SnG)TY>aExbBEps*@ogg|1zJ^IK#gN~@mh1?*-cMwT|M8wp5`^i4RDEf1UmsybJ;=lZezbj+7zD`*J$pdLnE z&vY#n$7qI##el}AnIkiwg`oL0y7x3QPQiedoW$;8e>g%5Hwtf;GY<3)131@dL$}Zl z)1C?&Kj6tZWb~3gkY{017CPr)diZ=6Cg{;dXQu=u`xK_eq$f`?T3f689|Zs`t(k;d znYkFwg|4P53&ylLw#imvgfH~lRzd1By&|{XCNX#OWCZnki3v;#da(Jh!~Qo3<(P;upIhZ;VcfxGiE!Va`nwe>VbN(-g0yw;6R8Vq{0Szj z_T9n%pfkpYrz)Uv$kQ!7pr7Fv2mC<<2m8EX8?&+wP51P z!rF&+cM;GO6%mn0ElJr)e|^uHGrWqDQy>p5`e1WrIGpp%nREHhhy|Yl7QDM){`qQo zar0L4fb-C5ErR75nk)=-WofcuA(p&92Rr)f8JHE zaMHz_<;5QtILZQ8Jc>grtXokj6QsL~Uw>!83SRyguu$4Kcyd#BfebYl(Aoy~7k?*o zV{tqdum&Ha`;atM{^eM(;5rmM`YwV6kHbjBW5LOOzy11x)H2qhRL~TyTHL(LYZ>W9 zLXSg{RDPt~!;>$vDrw4!hMICTe|gvMk4+A6tY`qh;OMl7L&?Os{laumPHe5;@pRqs zqC(5SC>65viz*3Ine5cqF2>=>XBQnH#TV@Yh)oAo7ObCvMy86ZaQN&jG;sv$ctNd1 zXte$w3Ow~B)8c^*tr5nVw17=>9T?a=XQp$ck9Wly>_7|cdftYt(rj^ae?#N1|MDTJ zKmV0FFX@q_>0w9B- zRPTWV3M5(!X4|67i~XUwDGv=iXk<5-zHUKOS$)_x^_={)Eb69L#ZVe5fVSAHrQA1p zStQ$5Sp9h-s#VINIMmt7f1_N}^QT|lzyAgduX6O@pZ5c$!=lm*_C1C%IF}B`KVQ00 zI;Z6+?N22CG8A0yaVybCY8y$g*LVx^)-xPxHW@Xt0@RuPr;5J?!6p{H;B{WLbKQx* zxV+}DvC(1Qj)a(5AwOhwTIKt03oGbPG`pHiIXc)~XBCU2SPf-ge~(PKw{Pzee;!5I zo40zD1fq~b2)Gu;<-CdYdk=NV0FuryX)y_*(+;x%>ib&8(uH_`!f&8vi z3w;S%;uLDQ8pf2le{E|UG8pln<^_rQaC;f^#q#~_E9z%o(rn*&2xyYn80%mL7LRxs zv2JmgKc{yMP1ynP`z#BWi<$NZu_6^oP>qID9jOQvFYG|ssQ;SAa6|ZXps}$}nkK8< zsKC>hgDoLb0gLdEZs$;&koU|Xq^0k6fHch;>OEd+0KYx7e@Gy-*7+(M!4X3^@)R5! zhuW&Ma4clRp2$>QN>MgmZCZfZ%*3*OV_arwldS;1R?%YV4%SGD4A@iyR&xYP$uP3= zEG$U<%65v}C3rqxNyY;}j2B*DQzb=B)>OJ!kD>vdf?zcTH+Ct9b`G9kaEP$gZ-<9! zBW*<@v{Fxuf6QU`7M~_{gD^!LqjwZya z*fs#+*vR=nM@mT25xzWgYC!X4B>PY?Z%pm`O;S%F)XA<@lK?fvkyn$RQ4^exG7DS+ zpC?pfxC?q-;J?mjCboL0nRZRrIDpUl=nTChOgP6qweotdL=Y?r zjqLRG+V6mm^%e2XJvVc!sL-e`vwWCagNrcIofh&r4Md^sLg~vnDX@kT`YcqKBBQPl zL`}>tL|ON|jY!??+2=1ALnUX$B+B6F3**^9>-BtrP7X)p5)Qn+hd=~8j5+`^bW8Fm z)L<>6e+At+DsC4KhkV;y&wD>6L@Ndb@5j3z-u$$@TmIvtpUy_A1)?9F-!PXDE5sm` z^#)cG9!#@k;lRa2c5pOHQgn$;hZ*)xu-(AeUuK|jYeF5Hv)JNL#ZiB#o}ePZ2(6tY zOP3rJ(1XWIJPGs|>;c7IAGXwf0*ITepiWxue-PqBUSPmeKq#K_W<&3qOKP<*_l?|3`N_m8rNk1HPdYhI_}BgoSwj84oF| z=X@aoRw?Ar7fy0uAcNh+gqB|A6}Tp~eX7f>Z>Rc*bN!Qq-T_vD1e zoTCu4pFz`wFCv7)I8eKMt`>Le33+YMe`nn+N1e`4SOrmInzfE8W9LSw$T<=P_G|dF z%u^jP&kixtg)3}YIxNS@=OIJ=6$}0C!t%Ojxg>PLA}I(m9-3%UR6XWs-8}OA0UCX= ziWdYJm@xv*!4wiSE1pS?R*fqfkl-cm50w{LZ<6h{#B+b5ffTZeC&4}YHDnTW(7(=dNeP1eN-@$L@l_ck5v;CPnJbbU-Lxc1J%HEqqyl7A>LFst?=!(6*miZdJTysaY@gX6HW?cVEMlE?ZVt#Sc0lt z+P>;|Icq{d4|IP6L;)TPe}4Z%+EDBnpnZkpdj5K+t>o99L18TuPbgH=CXV*@8pC;6 zO)23swWKr*`K8MWhA-aXsq`iQBLnQjPvHZ9{LtE0E_Xb_ao04G&}SWUhM_5Q#udtT zrR+3G98zhb%YY8V@CC4DrNw|XD=oV_jN`-Xa|{*$#FT&}%?7Dae=&^q8AH*x$-6oh z1g%OR45;fo1RGWM$02tWCSXenA0QOSS9%;ErUnuDS|SFG`ZwUnJO73%9eCFa6yOMi zwt8_})3k&h^c&UHJfQ_8%lMmh5vx%1jsU05wL!u|cfSRY=+`MHziw}-fuX4BQ98>p zN0kDOk?TNE?QO;!f7^_A8PmEI%=h*Y_x9n+DKjdkU>=oIFx;0@Gts0kNfh&-6pM~i z0=zt`S4?CUfAcH~$NcpInh##{H1wIfFJPX6x>kbo9xt_8B>6UZ*k&}5@)5jm3%;v4 zz!aHh>c8&Cp@7HQUEEx2yvS4Z9cN}V>&^1w-!b7m5(;H*m!#SQ5tmcV0~C|_>nwk* zSy_|YHWGfHU%^KzWnlmi015AUt311o%PD7%jFNpwQYj7vS=>mZmd70V_1i#$r#SLB zn+F>Qp+=uycVlSqDWJjoE9%Fa&DHhS5et|gkx)F?>;lFpNu&sr6oiX7*ldFzLrEB4 z{k-|+`fH`fhlL-+!4^J# z6HpTABzSN~j{#iB1k4V>?bW}Kx9N2(k-%F#<*haclQplAy{RlBB2uZ2z4UZCJgwGI zEW;EB{3*`M6O1f0Roavl5@RuFgJ>xou{C1}kki4MbHbFreX}=sSnBa%E(m{B(MU1B z_`4PeD@g=r-Pv7RWKCWczztVnUfVGa8@R@8d@_Zp@(dS6di2{n;296D;J=3zPr?#6 zYLE*CljT2ACj1Y(OX}B4%&!woL?S?9G+8gh4^-jTR}!4N%2RunQ<4n-jRfKAHm|d`uKR!A?tn{^Hf?2U z+-UZx6-cQrR@`QZTX#;fvf1PE-YUdYwe=#ajyQ2zSm3qJkEeqfdF+BqsF=jRCt5H* zl+zz^;6rB+Hw7agL;>6+#HR*#x&THrh+;x{;?ZbMc1n}WD3FSggk2&TB_O3xJ;HUg z7>sP0=D?@@&=H6fQCxoql?isAVGXg|l8G;i8zlg4B0A&koT$*6F!<&Fufww}G>Uf) z3ScsjA|{{*XY1R4zWeLzC6^GKXx`(=^N7qOf3c5A*85oLqrOT#E=S@yWQ!%?dII~m z?vj6;mad5bEJ4QeYaE!{TPlMS^SJi71sO1&p_VCJ5Mm%S$Aq;S)iH!A>fwRhEI1dpT(ce{hxGiVJ;q?8=e$RYi@-m2v6eUWk z%i+m*>;U4W7$<)KB_ehaM;arzUL^x+lG%adKTuYQGr*?)b;cFv2}+9PhCQ$T-K?mw!5CoBa0~Mc;F^4 zPHl5tw#~_wM|S6abbQxMpz3H3O}%t~25{zoI={^dilWf|!t!K+IXw&~W>w*}T& zj>{d6fB*jD#~1K$lNV|A1UNn@7l=k`B=?k!k^Xx=etY9~(H_ln+8^IM&LIAhbLl(e zu&c)Viidxk@s>*oc47*sw#}DnzJ-fbGV;1zUUg@k$zQJ^i2!3`Yqy`d?P`=Vxo`n&1zA+zqtzg)N^Yq?wL`yvm=8~c|8}~k=tR89tX`3Q4G)h zX_kg(1%xZDpul$N^-CJBglH|t3CCI(WYI2(z|eoE9GK5*4cKxi$BwRWfC+RBwvMQJ zI^}Lc@80$uj;~iJHv_JvoVAT97_j_Qwyr#h>@7>xe?oZN*Jjp03~`K!uq9u4%V-#z37-t_I;Rl>pzv^XE%f@6j^xNROYb80*@SR+LL%^pVnaDN|ErU5OWK_hLa5<1Q~~oQ`$79DsTbzS!5mR zfuY-O7wg?|X$)WEhh2ZyhuX##=cRARs}%N}v)f&4yGOJp?fu$6 zC%7)(eq-U@lbe{tQ7V?Cj3TI4FAAxNh(#Z+oorJ!ytjx5qGJ|TSz0&l#XwtRbUc-~ zYL?rdI3bGmoylYKbTUh9foJlhuUl=zcQoCk!w@^41dX)-7ydy;OaD5MJsVB}%42^v z1pK0cRNSj@pVo6u@Cn6kSsZ%m-dzxZHQ*;y{-`oJJVt?I0ubiE8Jwl{A{s#%Iq2YJ z0RuhS#=8krjiG({ahc2A*O(K|@_MOP(6*QAsXcg>rbhrTdUfrWh}h?>KFE(tG;Fm^ zdyVFcNJ?}cns1hFfp$=Ed688(B4vM1#VTTv5az~94gjrm)XR*umNl5u@74ymdmTfw zd&?p|>hRWdPpGf}Eb$473n<>96qrQPkPwuA|FZ3KdN_1jzK;fAJNn1iv_gEb^Q=6y zNB>wmIK6MibEq~yM7!(0o0|ZEN%8Kd8p+4>0!EOx?_L-(~y$= zhyS#7gR|7{1;VB>orxT8M*Cnb-Yix%Jc3Z`<0$n+od9-HSY1Ga0fOFn zf4~pc&FqMd30}62X%v2EcHIG+C}yO$wy{7_Hds%(WvseSgM3bKO7b4KIIsdt;J{{dd>mJyes+5;4qKF|Xc z0W+7t@&hY>ts2{M+s5@>U%|IfsR-{KKvH)`PVI~{w$mo;OD^MqNKm3h5)1&^R=&RH z?AZmd;F5HwdC*|7yFB+h#3uI>Hu?UFx#x@d)w5TUPdHCV%6#T>n1*;n^#b53j2<$pi;r_TKlOU*M&GtC#buKdx|>O#pbt63MiMsFay3 zwpYLY&L&GZ{9yt|c{cfEuWlzY(Okf4Gx_=I-$Y%Aj%PB-cp6gowrK11=VQczn?&dG zFF`B>OSogAL)?^`a?yT6atXyKjpfS;j_`YEleJv{letc$j)X{)V* z+uNRhLpD-jC~Ba9f$}tPE`iE39K&^yT1^wrW4Iwup+SsFC5(x4K$V26Om>3wz&Jd4 z6zG6bJn#UJHl_8XE+?xC#F4oo9ImqS#EB!qjsa>V5(Ak30ui2il6mn-p5#UtYt{nT z=3rgGK4-=`Q~l!o>)i@0K?{wp_1+szmczz>vuAYh-QV99&Er4ldo*hTtV*+`9TxRc|WwWT)`%ZXEcpoWsIl6LWQ`^oUr^RkL zVUt`GDKpD51BT3l1bxfpM#t@#X7D)ruHrQnqFNH^FA~dL(Y;k?WC~Y5!Sc2 z*BPJA-`u{S_ww_i+@mD2oNNgoZ$x7%d1?}qb!5W*m>x`$tGpwiAtcpq144Zw zIZ0b%$dR0^!j`BNV^txtG!#F79w0+_V2Q?!ebKgMy`vXUPL%8{;k`_5V^l+O)!mJR zpuGmSo^F9`AZ)R23Gv5k4a(mfh@f-|(Y{$P9Wjoo^oyvN<*s@fy(@yG`}j5ESqjFcLe|logyxQO2$`)V#2N1FzTB1d zdI3>M7>@vMqZMVH6~P8rQA`X5ik?jGDe7$N!nk-phR#>5=$Q@!G=Tz!9|3E4166GBQ+U~gk97Z z5NIqqK@d34NASxFS0=Ru(Zhl-jz89W@pRbpFN-39x8Lq?BEMo$D-}aCda}bGZQN zV+la-Ku+TGD#w!AsnR7@@^uLBryblt*Cf>iVm0rWekd9O=>nRVHpSLqY1V(bJ)8CI zt)UaTvnFcY8oF$qUvRMjA(-kgJqNUeF^2X|#8?KsPL_Q+KCF#BE^;0t5;hLNKB#7j z0#c?UaSskZt_xa!IMQ8F1Jt06PeCqFXIS*rm10QEkq#BZaeo3#IhY>5Y z1rdU64tGuIII%Ir(uzVQbIWuhM``+mpyiN|FUqjesOigMfxJH8o9q~WikdLwaMLk@ zVNx#%6C-I7-b_}g*}<% zlsKhuXkg=*HaK_48$54co z8MZfne3ZlIUy!3vnsBOYa2ovf)iqWeSeD3qdT7dewk*-|t=e$U4;pYCRN4);COYj# z40xfa?+;tYaoj_p_v$R^F>XnW1uZwi^=S&%w(*dz*FU1lnT;_}C1xS3V|{97diEu> z9CwHHrk!D}vZIYLzcF8fu9hoxmL&)}QifxH1n6rDdN|j1_FLApYv(t3DQX3|gf2+- zgOS##_t8O;f;7`byJk^r3J?FG@GTXzU)q>ZRCT#3Ydh|sM$b-r;xz#jGlTNv@uDfV z4+M6(F79bCh;Mcj<&fA7if|b2{RlwP#6sDL#i=R8AWc3#XE9sgx&p}O=1-W-H+GQ^LQR8YbofsSr!IhE0&q!ro-LU zBu6EqNhHsEK4)=8p|sBe4^iBy0kO<~IBhs=K=$Sa<1wSY4X-p6U8#0pGbz*4cU4<@ zFhx8Vap-FM(&AL*#pR&Z0SS@#ct0#6H&ODm(KOzXkXOOH)DW*iir|R zc_GdR_V@tMMnmD|f6|KA&k(3;j3#t&9+M9~WWRs+tjmrD3|Izx zd{_?`cbk&-+|M@^s^XKa?TwQIoZ_Hg_(4Cs)7J-IV%P50zJ}o4xR+IY*e_WMnW1wX z2!@*zQAp~cp~QQPLGuHU4pB(ZX|c5TK-a86`#iS)JDk2Ku(al(sv%N;*?8tZQQ|_%U zPed@#T$*1LXL>ZfMBwYz*GV}4bM0wH15A6qqUVW%_Z%MT<3!h3;q^ zK~oQtj%qPG{KKL>o;{(;8`N>&dI7ARsEQ!7!7=CV_fNB)f@-zHLBZL6vNx%lx{C6RCua@-l`fpI4VYZnT^A*%p>6 zrk`zFL$7VDYB_Ig{~mwMP7V&^glhv)wzFK!=3kg%>6qK2E+)pCkeg@`7z{IV+Zxqm_<7kv_j;bMa}Gkvr+?e)ne zrxtA2XoKz3Lu@y%7F%9(3)F7?Pw{kg>J^fEcaZMilk zpf*7r@9ovBfH{q<(&UA-gi^zOWh1$>Y31d zy~!+Tc;VT@+#0V{lZgR#Qd56tUfg3KDV~fQxe|OmbP*YM5QIpgf zvy~bE)y8(f`Fa7Ik~KK9QW{zx4);7~w|=gm&# zG3iobm!aAN6#+JvfkFWkmsHRLGy;Xbmx|B>8-I~1N%`#l^_|&SKD8&?MLyi^lQXmP z+TX0w)#EB%{cx3r_U``b#rKl0xMETnvATa)ah@`()yinagvwU;+tsg$W?Zg+zyI;Y z_r^^(E5)o7K+5m>p}E6NSKr-V{pX6qY`Ox{nPbjzPfsbc+8nNa{XJc6Ve!XR%A~Wa zCx5^7umXl$L2tkMoAElM^k%C~pe-WDw@sOPtO{_3WfDM1Rr( zNf5H9D&cc)4Zi~=$CgxvYhGnIwE-t6JJPWg2^)QFvh$TFzQq15{NwmHWJ>R%#1c37d67cy5%Wcq;Gf}s^o+09H_m0FBxQ9xbua2u2Mx`9 zm~4OOJ5AA^_FWs}zkxfa8-LF`<%Wm6_T6+~SRe5&$oJ!p1*<3eA~{u*(7luo)c*93 zw{O2dhP$%Ln@>L~6e!0GZ%zu{->C1og1$zB! z(QcaZI7qyO@j|cL#g;R(jy{p()ppAJCg#BY_9X*LfQcDQJ zKfpq-W1x(ylx2E=!w4vp7MRJEmLnv@If5NH?N5eN9$!4I5e?DG84AK(hoaln9?EQ6 zM(5?)GRb)WKHe(J~ig!n77$UB0Ljbqx$UbW~VSi)3k}@*_ z_%fV5CyUA~Wdi+hs;1ru5+ZIi0sw5CX35X?sZxVAs+C~IbH54EZUN+Fb{_3`2|+g2h#&oV9ab-=26KRP z2CAjU(*a5_)JL@T#LQ*n0-{C^DGhUcC|>YV;)=ZBj3l6cH?!y^<$q-o67IksoZ2Xp z++wAQ;&;2I2&dZC2XJL@z?Q~PIa1R1AtPhe9~8d}Ez1njB09;b?PxR<&A5B`tc5Hc z^%oA@CiVBZqr9uDy4*(HL<^`F1~=&i1EU|>kv*jWA(&kj-z^oaE~Ig)h^5ngBF2@t zL4;y}*=907E2S|^$baE{c}sN5t|eZhZ>gDYq2Z)U-c+>o-shXT--)?1yen`As8@=J zxda$>axxs&p>`i)Btz?EWIY^>PaZJX)CXGa@~))(Lkk`!_)gnWCft;?1!&_#1tf`| zL5Ak3Y{MtLDHa*7hp|4d_?HOGG^c7?;7wft+~o8~0FJ^mn15mRH9Zo!lt2`94?{tI zmKm{#9V~GhGnQk9XXkH0Kz5=SNZ?Ig1x!)X;|)xrz<}!f@n!O+YiSzo30GhfQdk<^ zTaXGpOo50cEPk93Szf_BPIDqO^y(9l%A8IDSQg@OS||hS(-6B>f`xLrQi;h zd{W%c5`YVDe19O!!3Tj)Ih<@039d3Yettae5hoSfa9HKMVds8~$AmEOFKyTf(Lvh8 z2~bDmY*s)Do0xjQH0AZn=Rl1D#42XDA&@^d0r1hCD8p4ERq9_fF@ZKw<#I?kW&yIi z0MD_PcyCP7wt`%G?;ipEF+;npt02cy2`J>$^*LLzRDbGN&eqx^1&?C9N9g`$gNbq* z+_Qg^^UELB8j{ie6h5b~%t&vrvlb$GGg*xxIJnEzn54^Sie!Am`aZ=0lK`1wG^J&! zb#d#0MJ^av@&QZkd@2S;RUPgK4O!hR?ud!MA^f(MqBaJr4U=Ywl8dGm94_=3Vif?6 zs~R(yO@9geXQQuwqG8?=dffKSXU5x_bJ?7GPD_bc+=XO{tKBX~D8elT z4ex;~&%$jJJq76TE8|r$J$fywHRk>J49348#eYKZj|%DK_8x&9%$22oOUFS!2|1C1 z-vU51*<5Y(;!cW*#tD=n_(nmpocQ@Bjt(U3v7j5{v5(Cw##(5{g%S z;IiY7VP78L20-2beF;qwk;^U?`b~NjgVW@V&iS^8 zhYHYo5WXqsiC}mlgi2P!T?DlXT*W5*oWyxL+w|T2)&Bt9gT;voWo~41baG{3Z3<;> zWN(+S+5-gvIF|t%0~7-_GBKCIcL6DXSy^-3HWGf|}SX z%}bI}(Im%WWrm#akS+iEbfdvj7)iC22MJfR(cS3d1BP!tZTRNR1^4Z%tBdEamDmWu z5@Y1%Y7bq`Vxu?KXeM>Exym=cZVeNP$;;obetiDgCUbHfu~-UV=IGuuuaM|};)kn? zKQC~WZ-99evqUD2qtyOm^jT@dl zqdt9k-PPCLl}3sl{t5g;?*j35Zt#Ju}DvlLl+q`Vge#( zC#|c6qkoeLyklQ`k8eTtj0tnOfKlo6g~m} zsXx_yJCah`Ux>UD5#$uBphLCESBsqy6mCLqTjzXY7D^%MfsfJgAbeRtsnKHl{^eyX zw^#38zIsLz#h0vqICa?P;81Btvqa9pTE1GdpfSoZ4nA=@M z%z$kdn_nTVhL5L+al_gd;wGh&MD{JUDK$Z5-8v{souFcWxH2e){m*50L&jOi=OfCg z9Pw?L7Zoz-O2P>=dwXg0Ha+wOgX6c+_P<~xm+x9!2ZJ>Q5%yJH{Kkc@oUM@Go?5aM zkdC&7$(+P4S{hb$M>IDnsyUtzecn!)U}o1-Gg`1Kix%Y5;HOtNXhdn{cHdXt+BnKf z5Ka}%u-Mdp$51GUL@)(DJz?1E_i5LZUxvsX^L8k9tztYLV;3Xk^G%tdGT#0Q!5@IY zg(6do8;66iIl4`$@8Ww}n!=w_63(GtI#L%*ZpODw@n>H)EADWp&_6i;FjKyF6nS2b zo)ob9!b@COs>Ce3#6k$a-P1gseKHq1Xm!BGk&%Oc;rgI)0scfI$xYf4&=5yH=hzume8n8bcXuEp9+J9yMnsB*lPC*&OLvI*U=mG^fVD8Cl zze*;5h{2_mTxN{6tUeul0vu2lX2%{ErUV%}XknS1E+kW#Uz-CkX)MNj#3EHPe>N%s zJRHomoKIpU$?DqUmR+KScC0NpK$BwYoi^Ei?8`&9bFqcfb2j8;@s?^Zch?vA0xCtb zo8`fK-(h?}l)bG2`FN7+6c0GESU&j!4ZA)!R%|EGGZ2Kad>k`x_}rM^;ZXk0g9>{8^dp&t*h0 z%ZNr>U)_;^mR1BBppSaRgCtaLiwrgY!wHcOR6h5Em~R@?c|e&s;~!PW7nr+H$UQyzg%A6j=I9EOY(Er( zwO~>*g&0G5_wZY^t3CF$10eN(rMpu=sSikwa*)tN6u^DGNb~deBfyHG3<1};XskMP zCZV*gm{H@CW!au#W%ZbAe)`|LcTXvEJE;&u7I=S9ij06$(+khNN+Ve$?1LyTW{H{! zOMA?-DTG`{8ecqE+>29n*nkQus2wX>_GpSlmCbmOzE_VU7EiK&XN$LipsH}! ziC7e?Wy*5FYNYom=(vpOs*e#qv0|3} za0~4H@SGdPOie!uWamtOs-pR|*X0$1$>!A4yCe-;FG9%Le15a?a)t!5_&dsI?Jo~m zTkDy%=>i@lObB%!*aW?aDfxH?((@m^RyUQMj?DI_;!B5zpnVuC%^&b#p(5yyk@j^F zX~QG17#_X5b1!j;WlAr+9Cpp>Bb80=8^d*lLvp~MQ6isPUtcB~t}JwukEA>8LxDy9MNn;o zw$s(c;@SHK3<`>_vx9k>i1|+J5TOCZ$YJ5R}&A zwL~T{GkRH4W>2*dor8zJ00Qx`(m&mPcBOprc)P+X_(BU1)Qno}#o( z0(jAG(4dPViQkf7p-QwZz?NJcN%!A($e}LVyD5r1*t|yH%zSf@G}r|+c)Or}yxS~p z?ga}NBZ*Qx*lb}+NvvcLDM`50!6pwrg^Dmi#PZAL-OYWJOvR-pF=qhgH}e+g6y5T#;>P!Gmn zWLH5amfC|Jz}lvy3ThK15s@(oa?qV@lD%uG%!+p~d-_zvmjV zs%+ZP!@HtP>!&|Y6K4QeM0)hMI<-YrrhkVEf47x)D|_vB(f{^2-_A5YRAp73Q>Ppj}I8TO^kQU)Z~6s^f?#HC(#uYw!u7db|uOL`)GMa~HzH zBNo@=6~VlkM$kFjm^IBc5S;7vvtMeBUj+ znRmdAmii?NGw(N(jA&?OGrx;+his)dd|mQ5G?_~^yLMRP@kp>1VN;a0!#T0NpWwY* zHHEdb!M$ob6QZHFxGs#BaNhS?=ul7Y<9aYmd&Bygnz*q~?J>$AKV^lZ`6Nu4fbVeC zvxlZQj%?>6wtevMgV~yT!r>qYld)OgzCZ6H|1`0g#RomL$T_)7l2h-Ni4z@n8j;KLwnrBn}?j z(Om!xg@(~K_3 z_O2RY0={CBXirkyXb7NeMw_6_g7pi+ky{`gb6#WZJmMv7jh4(B^S`*lQ(y8R5r6|) zX`KT}P+NclDbB1Gt=oRN-LH$wnUTiL$U~fSM9T2+`kMCM{pl{P?*B}axAaMu%j1rQ zzjDLhUD0^{1Iz?^5{NPbBP5`It7C_!+`8l}ki|yWa72QPQBys^hkUB!8!Kaj5fRpKcpw)%}Q7qw#Tntg@At za!ub)|M>Xv6*Ao9`?P%e>!Ej}KvdbD-a|2Zm)^?p&$n)sjIcO$`)9O&p94=D%Z}$d zFPpRO+TUEx1W+H``1}=2{*qP8GC$Ofq$`+@)LV^?JiFb_G-0}5^?A<*e$+XcphJHg zpxO0%1BmJlI%JHPfIyo`%LMO*Ll*!JO-0oIO)Qy-qA{8h{8lg!F~X@7Q4dbXd<6%g z6Wb7(Pw@enOq*M-i15FEj(D+1dz;^10?&+Q^(OP2%QD6FC%-#|W89)J zrSa}kgeUI{RAaF?A?2iy0I$-j<`)G`y8HC)k>)7(hB2-SW_&5C}Tv;rBeHfpY^Kbn!%ypra(*q}AwDmG1ng zc0NON{Zd*@8X|^TC{nL?Tt?Iu4bNY>&}4rUpw12?TwZmBOV86}BAAk9cn9yiV1uKO zMP2daPHd)#X-IhsNps?EiUyb}GI48{W9ZXwsaH_zDUCDarcc7(0AT<#iGofOz zksgBf%ORqe(5;rvC#5x=PvV4p5=ZD_?mL#0ma&evm}tOPPnPmFV%DB7KZW((l@r;ras=mv;mN6ap|b zlTf%Tm+kTc3V%nMSXhDp0Wys{v)wg@4oVuZ#bz{s}CWoU<%Z_sN*s z5s@oLpu0O5H>{fa*3-2-&e2W=+J84~(Uga-Zcga0;luAc-@j2Oh|w54D|iPHv%<2haJqKmi zxTxnJ7BIbY8%Pvw6(e(lrm@K8H^;IwH;24w&9|?b{IFeGV=lSmEXoobFex3Fp1;1l zSu918&DXW%WXZL_bqpAEW*v^yrGyIcb`wG||>#>1Fq@9qF0=>|vB&8|xwlixmq~QF%A)T?{ zP?1+erl_m7YmP-1k~Pi}mc+epS0U*#mh!9@uCkUkO$C;?Bew~WF|E?6+OU!MMH93E zfPV$|#NLM4#kv;=qlZ&a#2L7z+#4EM&*I?NT5sd9s_B1k_H}dT_I2*x`c1}azS>#@ z-vaonTm$6XFlb$_wX+fVdmbF7N4tSg$86%pv6!K^e~gRI#J zO088Osm#rne1F&(n)tDaXHTa6NYMrlp+sod=VlM;a5Qa-{RBrptaackXt zq8ULG2H+GT`+=}ctLwUQBcDy|UY8=qvb(H{2(SVt5zw|YVPvhAP?`w%H_ezz27mYV z1#5jg1W_3=!G+)N&5Wr4V(lM`B}a3lq^S99e=*yMM4c0W$1#(M_<9i_n&AXjQ^yS+ zkF_AvLj26@YIoM1Oo|iB<%Bv9 zW5Qn|9Xx@}x~Bhq4h6LGSj2_8aeHm8X=(Mg?6x!lFvJ$NIz7Bw>;4Yc6Ioq<`KR*C#v&QXqOCq#Otk7M{6>sJWFlBp;i zr-M|Y$w4-6bqw~h^+AuA-W%7VtSKk7HGS>2xod%-0U2jO1uamdPJgms!lzkK^S(vi zej6kcGc~kH*C8sfMJK&FWk_Sqvc3;|c7_c!F%RZY=&>a4*^D}0w1RCaaj-PTe;@#`Z zs}~`f5^9I)TT4^5$A88uZ9Dor*!?OA=|?{zX*>jC>F5Oy1>KKn!&(DC}kjAC~bia34nS^ z`Jjss?@5mk+bs}Ps&+8g_xWW?9h&y^#D*uN6_eS-{YY+;?|)i@s1+P;OnBvP9iyUd z0Ces?X}fzAqaM_!sEQR_-%svKB&>jQU^1~BQnnE3IAtSWl{vojb+}&>g&zAjIhXcg z4dbCh2xk(>_*8r0ucadJ0Dxcf&R2E_N1aTIuDd*IeJytQIuk|uPYfmOQCxJ9~;`Hs^y@wBh(=!-Q1%diddBNcF7yZc?}(xErv*a=$A&4xz0Vr zR=^G6k?@>kf-#;|W=eK_FE zRL)>HA(iZ_;?^DW4>$I8E%Z+3d0B|r(KGsM59V(4a94fBdtdOO6A6!=4xhh$>tFjS z|NjW-4Mwx4S5}>p@7+sfIgvQ>G{)b)b3CTszWK)T3_RHWY!~B*WidaiAz=Q6ZLdZz zrXxLZ`hTIWcY|IO8^qiHpK84fG5zHspIv?TD{}qY#0vfm)mCO3Tz0ENND1{B5)TIn z=Wl8+oa?{Y|IaJ4`rX8@FK_-en%-Ppzx(qffBWO=-Dv81vp@Dk?j1M+)YmsD1P=i)mjN3C6_*S91A75-w}<-! zlmPxk19=7k$-LjNC6`H#j8V907n-nxoV~mNb&o?eAfI!BFc4BQB zpV58{{m@L+uXjHE#9$?v4*Jhu2pyXUQQpC@fKiie-P;R~#)A`zQ?RiM!v*Za%p3 z>As+*bco98?7o9&!!cSgKmq~e3;^na*iB%`k-B;@OcazYw~c*hZ0m9X`q+x;x%;1U z(?0KZ7Lc{@%AHA}O!BJS5aW6fdwEA#xb#LUAXD;|3UoF&(-o_r-da+=8558%OpVZb z;C}h0pn`unGDZ)qeVjqzkq9&G6s}*{0p(BPev5*{PQY)AZl7<65v@4o_JT++cS6;j`3PLravyYts5U} zrNt07Uz)4muYUgd+Q)kt$C&^xKK5YVLyy3Nn6y%08lPLcbL(Rr6?x0ih(Bs>PxYZ8 ztAo;?-7GSCg864xHc)3QI30c4*v9?&(>C?6r?2G5w^&?Kil+JkcB|^jEyE=0(skGD zEhS8>i!~_hh_{;m1Goc!BxKqwi4s@OrV4Txpk@}jC$WnQEb4S{_~G$y-22E4`RIcC z`<76Y=DDH)`%V?q=Tw2c$Nnd`6e8F`SQ+j?q_r1HrFjVW2e4L2yV|JW)(Zhu#h#32}<4`)Dt;qUly^UE8rB399tlX?C1 zh-*vy*%DJ*+ACBYo)|1ZgB!H;x0gTu$g?2gbV=?&3LNOIH^>Umlt>S+gGf#9g!Bl~ix@ zA&8RV>^(a@#Z|nF;&|k$(-UORlYilweH_4Fv}C$I)kXW6M;b1?m2JV<9K2zB8#fOukTQFT6NoI{KUXzaybpvuRRtA#U$XqJ695!)GkRF zR3nJmu}IALIdhKKs||QT2SLI{=Se1%w@qPZvtHeP^?%HzewGSlZe(+Ga+j#u0}KK) zF_RIp6qol81nYkY#@5((e6Wa^GZr@VfbY2$wAo)vaCi8HoIQNSQF1i!9PjW)M7{OV zVB~uC;qkHf@9bhO9+F7NK!`^YJU@xYzirRXXpA4lYmUM+A#yXN@go+`2SO0E%tFL; z;L-hpg$o8OO=HGJ0f^fw{%6#T9YAM=L!*oGj(8SPIN@zclWQ78(!=h*%6VAQ?kjU>6Nf6AOf1+TZX+Y8{adqG&6&hdM{J@l! zRxV|6{j+~Fk#*U{)zj~L#DL4K1L*onp}uctk-jy5%>vs9@llAY+u5vgJ{~H*6=!)& z<2rvb!6*oy)Oy>)b-t%-f@ro%wa)9XewpATmcVAE87XQYcKy9yFY;{Fs0vSegk8>sh9lYX{c0$w zZYNit_pn@dbHg^D2wnQVQJ*EH*5~R9n#+I5{7>a|etvn=UDz?s5lI_|&N`q!_XuJa zi0j+Mjq>_^dNZ4xPxTtHm5CZMa`lreiV3@*v`c|!RhC>9hKSO}`M`@y>yy$&y}+uh zoZZP98I46rhLiF;hVav`-^RsNC)Em8HXAxcDUWLCoW9HTs3$_ayjUgm z8;$2E#!DGkuv#@D%&>M_s&Xk6B9g3SA!mf}S!rOSg4U+~UA`}~$~@qgfYwOP+qCMV zYtrDqWB3>-HTT=gOX0rjD~SUY-%)?cz^g2^!Pb*?vm@sNQ@#Id@?U*;X`u5e^TUxc zA_z(|g0#l=F;!r8z zI>ixkdwxEh&&^423`uBx;~!tZ5fIy);tPM|S9JdjAChg(h^P zm!aAN6ap|eld(Z34mUCiFHB`_XLM*FF*r4sA%p@cf6ZD;ZyPrdzUx=;*j^|OXE-E* zAV3~)Q=kQcCWp3$500Z2MguGGqe%aKzu`uy)>_5d)h5j$*6i+(oX0m0lA##X1xGQE z6SZGsEQC=M&zNBjY6qwtPX z=!A$|jgO&|v`HX!lww*zwV1dGTR{~YApyEag%%JjYPJ!=F6LyiM~o=d_#iBYCOZem z)GQ#$F$dEY=45Uouq1OeZ6OdRGYD39temk1f5c@|S8O`dR+vFFiwR!UX0Zf@z>Q)j z)O1YQ21N|WHpr$;V%{=!G&o7r7Jvtp%>WACwv=F%wiLvbZYg*}x~1Uu)Gej#QnwUX zQdvA!5N|iqL@(&=FkSUTtSsKRGpU4C`?xkWfA_)0m=#fWuo@6 zf=Xtjf_Jg404+1eMgiy@UaGj9u(Uwc+$mTIR8t1n%DFZQE0h*K71LES6NlhRNM!*k zx;$k)@#7pP@JdD?cMk}wgrNBNkl=^S7&gC!%_lfJlYV?4L)xbqe+*A$ z!v;8%ZU#uSe0jaTc(uB9r*8T3`H5S;UVXTApM-t=&-)cVod3C6ET2K})z$5dWt42= z7t3E(H|x9Wi`7k+jM}@OS8p%RpRPZ+Qxi;K)?DKm7COJiXe728JH{thS8L36+GR(R zr)9^Eps*omL&Oci8=^DUJz?9le~aa-yEnJ(`KQaPzZT1<>+83x>n0MNEnh5uTs}Ls znZucBaRL1)Rc~2=@v9cCntcEiIY+O7!S|%O?Uh^puztOEaQk<+=WpJveEi-lIgb!R z-eQdj$g$)v)stnk2;nq+9U+i)yag39jc0fy8jJxZM7OW8d>tVeLR5fueFVPGr%8;_4M;q?RZtmXPVDRPo$|q0Zm>LLjy9Va!(GM!XZ96<6@>~tJ z!G}ED3tU{@uKe@!zb|iI+!4)Lj>%`hIaxn~^u$tpLPbou_#ne}dGW`3eT(Jp^(GCd z!edfij~ks(F6d6se;_FKQKMnKmCNpBIqd3aQ@OWl`eg8@f$}PKI{2l`33Wpk>HtTe z3VTVe3J=@ShdKBmphwR;K24*le>$}K@8wQO5EnFO(d+eVbF>O+pjAjR9Ri8xgaDGK z*Jwvr=&o~Yr6Som6Jt}TItc`$dYLq5FVlD4wp;rS-#)dNf1}3fK$xc^!aU6y=2C*U zrZXaQXQ(PQV`GJ9WbVvUT&$zwTt~(E5O)f>{eH`Ft^;wdN5r|#8s~ydv>jDioZBg; zWK?lfB0*OpaZ<9^QE{%{P@MPn-jf~PZRGDcyd3E8yt@a~dSFoPIdPLOxGKZ}F`ZV~l% zMgA=#p$;79Rho{3I_o(ON4q}}yR-F)?YttQ^TBh;_^ngYK^=A8SmXcN@l1E?K!DdY zy|1Z*w@N@_JF!~X>zF_&S<0TTlOY672 zJ2|E@*^)}>SX&9iMAS?tX40Q%1RscpW#ZalGLf+~dD!RNclWhxV2Kq`HNe;8(b#eS zHz9Rf@>sz`xGOcf2Oe*MR%KD?_A(kg-(q!Os(^7D4*ayq6o{XjKr&o^4tteGC(q2O zAVGGwi59WSqqSw?mUu-K!;)|K;|GYb6c`)0U$tA@Wi~c>`e)23nFNxecf4Ow z{!tvjyl}?oS@0*jV&Wfq-FR|6H-8gG6m4LKyaedjOXh7r#Xu-;&}T$ckOA<#x@I-x zS-ho$AgjtW^hazYSA@2ICeHNhGE303UCE65qRe3DS!^5}J3g!M{n{0SZ+1Z3y1EW06d$>!GVliY(Z6=<2U~NJA9tbWG)2XE=MxanQdEtSo4LyxilO zr@strRSC%;O@dW_jThIAC$Ihet(JN8D=f?G@1U|&Cea;woa31J3}A?h*Y=+2~vxT}&i01=HngI0!qXc+&!Iq(j$(w6}0IM?XER$QEIP zl)&3+npXO$>UhE%tmiJ=IpIJO0vgpg@LcPF7X9X)jMnwT&6YJlm1wSnlYvewDG~cc zS&~JeiK!ERr`R8oD{)IR5UJ>LRz+`-C0y0IZ#=iu=;{RPPcZWd9u_=j_fT90w2#n8 zTJ_xYvZg+BfvldS{cJkYDgCPVo$m`%c`3LzHEWQh*fKEus z2R95Wu0wEguE*OZmoqom>4P3QesJia>#-sHvU;q4&b6l|za6-JVYOE^6O(dt$=asI z^0I~g0e$7g`D6o<2a?388n%?q9-+cQZv&l3P82ZHwSFK3mNpr>FV4;0y^&C5ngFDl zl6H0Fu1n;MD7~piK9C^9f;sMhRG6{KZN3-$qHS>i!^S699UGB9PgyVgYwP}j73sw1 zA6GQ;-k!T4QUKC!}(;h;L&=q5EL6!hx5`YSUSC3K~{{(Jz~_h*-|z(uI%& zcIha}35oP^j;2S(IGtr7on-`aVUm*odHOJax`HAF9xxt(fXoIigDkxQb2kl=#YvjI zZV!+_%e4#)OaahEb=MvVPrz8gpXJR36pY1r+^=;EJN&XaAL1TCH$Gp&23|M%IzEG^ z8@N^e0;S#X&3=EOD?WW)6)B*pG<`kxMJ&wZnuqREm;QuF1o#9I6kQ}6h&nfMuKpB% zQK$0@3lke|SMUMD<$iGgxy-JiJcft^B#^PrlXcdo>p{UL0DQ-)sH^`Lj5To#yy*V>!T+)og(-oObA%^JSzA9(j9x}+Zdks zoiG!}xe}nnYn~3%I{F4sj9bN)09M9-LQSs!uaCcM8bYP;w3*oNU3nphj*%y1P-?=F ziAMpqTgnnyEa%t2e0OghY4!45Pp>K1_?_1rJQQ0V;UGGUT~Eq1S1ko@Yt!Ua&$htc zB<1LJoTQN9vg{e=0gEvw?fjt48K|ionCLIgk38`A^(zCdz?ge1 zAjkc!q~Z$BpWNVAuHd}!&ljX=`PrMT@tsllcA%V3`%%6${S4TDum}GAB;y&RnBU)6cL{|X=&U?;1BWu?MgQG$j09X# z&qm*_%PAdOF{Md-&j5&g*P*4gRFu{RZu~|gOr`5^jU9XdoANEF*6&UXCI~(a#a3*tgg}&!xuCh zqy_hQaA$h6s_<4-UX*6OL$sQ2s~0*D|A#5E*ydA`p$J-@C@FE&u9>|{d+4R*%+`YK zua%ci<~mMA`Z5N$@C;t!6R$)686xbksnIMJGw?yGzB+>|QiVcC@2eJK_-UU>GN!(2 zK?xTGL}A$IL;vpczsaY6Keu0tde}x03CR(d93e<&wP39RkOg;r7D@zQFk{vlICWgS zC1lXdl`mzN_Y3v8-gUjW@#9pE(3xQ^yN_=_T`y!js|R6%w<7wPawI<4sm_pp7!Tp;25dZzq=X7X zY+8?MN*;q0pXm5>QC zj2L;L_KvHXV(hEO+eKRB;rZQ-buF%o;RF|$ai|b$zr|GY?@O|^%N&rK-RW@7- z8lVYWg1PXgy?KLR!362KLkD4_DIU3je_$2&Rb7owJb;qmmt@BViG1&$z9bVSFgs_t z=J6|2;(rN&`ZJtx<2jgekRSN@myaKBCO$s78hl)}r5}2GGjryfoV>Bp&i%iwl=330 z0#dUY@&~>WffLP)N|~(|{do7o{{YXDR3Mk3+5-~;GcuFWHxmRoHZUNQ(FG_Qy;oar z(=Zf%@2@Z~Xp`E^&Xov(w25sRs= zqw5<8BL4CtfQIG20r#7&DgSdU%kVrGB;acF=^sW-#{#bF8W39BjM^68%C|txDB>gP zHL7@8zNie8zFD3JC;^LqI0ub4GGf#8wmWBb9P1i%Y}0TZ=OrK<8joh9gA*44-|$_2 zYQt;+5Wf^z9&jDYK#)I*!1dn2!T!WCoxLzm*S7@`i((}w5m{0q=lMd61cxgz%kqfq z-^{qjV8W61c|aSRr}QYGB)Sdd$sf>x!V(*cC+A6j$YP zzlxKla%u)ulO|DZn8Fnae;TbA-6qDfm+~kCBmrfY7bygv5{H$935f24l%#V`B^3~@ zADzyZ$SDLh0X~=hDFieO?8EW->}>WuIJ&r)mp&>4J^|;KeJTVW0VtQ9Dg+_{6PLg$ z1QZMMTZfNlqrY~y(%+Y%+5-~;GcuP!4*?XH2`dDA0b;j?D+Edk4P>%d^ycP=KL9E` zBDt5L+5-~;Gc%WgrUMk01~vqF0dkjsHUv@u^0(7A1Rw%`S}t;t>qwKWG7LqON(@<% zhv_pj8gsu~CGhHCxp;kb!5bua`-@h7+jQ-{8^1Sg<*+>P_Nyt<$OB_`bqA)l>9tz- z4EVsyH@t)+k7L}|UQR!N)$Wwyi!Vs>jL1I;7=ywzMj{PhoisAuIe7SI>KfmFo#QDX_+f({CP z-P+|;wHUtfb#2@h)Mg}8N9Rv7u(nkNkNxTfxje4?mT_)hbZ)+j%@cZ;!jmAKF+fQX zWm6+^+%d+`fU9t}V^r{%Mq!8_pddWhqf)px^yK<33IVp^6I%1?45U@*k$7d#;fh~A zSnqd#&>4c|UaIxSr~yDgZ|m`^oomUCe46`XajwU9WbWaVk%%)rdc@8-O$(V!&)&ss z*Ck{&HEeh1c*p~fm6GwfXEv51O{bCM;U6`^NcS9Vv#ExX8<}JK!EM-+`%TMO?D-q( zb_^}5iotv8GYsek7Y&>!I%EoCTKJ?*;0CLIpym|b_AK#lyV5B>${l_IZp~;@7u4E5^6oSd0B~>7rT7IdU}=SyPB|jg*u+17 z?M^u_sa=wO%7}xz!TB%O7hO-|fC{!9wb>L+yqbF7d$n-ubKH}Ze5xo*H;obXIJu0i ze(#Aq1*Kd}(KYAjqPwwuS5SwhGnfrvm;lui9YO6b_PaMfU;WCc9mubE*viKtD}(9D z^`8EEhcV{~PCG&XFe2lCLBQ4&Ko3xVICKMFm6_9z1}10|-(4*;8C?H-^@fMJxmKRT z7sl(xJYYn`tOoG*-8W+)xZP|qiDDsh{Yf2in(~#SY|G}68g1uQ!>p^Yt-=jODWC^8 zPGfNnwMakBg<^IrjkM6Yn#R%>Cj|{d%;B#OXKD6|Qbnl~;lDC4k=YKf7ol{2y$I$) zfS{8?fT+Iph^pd?-cYt9iXs(pSfntGMxJD3E``dICpHc|7)VVu0uw%#*%SsH_}UU*$AQ#k)w zFy@xy@Q@b(8}`0r`_sM+8Jg06v-MN=mO^OM#hb0Lss4~}u3$FH$W;kRvg)gj+IbPAY(&@6a{ z*xAwXv4y}5-W#rm7UGP?SPb0w%F#vgY-#8Jy3ryH^G`|XD?LPv^8 zWF$|BzQ2C;H{ls{CYPbw0}}!>GnZk>0Th?0Jp?U(ZFAx_5dNND;eP0iIY6-_zu>et z9k?c$Fyzu{xEVTW#^Znh5x?a8(vtT2-z!-%wn4mH@1`GMWofnhX!qH*Y-kJ3(5}yn zJ#sKUJHN7_<^tD7wlC0g;Lmm7*mSX_VQfmkEtjh<8H#ZdeBYh5j?~($_E~Swg35 z28#8cgg`=E2m!10w?af@wjk1STPhs@bko2P`auv+D?Dk#bV~ms@w{5ygo=ZxfGGV8 z|0h3_ukcf55n4_$am*L3?JeUd9e?;Z8i~<==g&e8k10%c6$<|fy8T^DA_KSxHX)s# zi`{W9IaZ!8Bw?Jqo_LDywlNe>^MsI^YPE22%w9*WNJzvkoHhV?*qJE8H_ ziul%N!C!k%ISpo29Q*Q4Ro<x*w3Jg?f4>OjvH+M9%;So9sBn=JbVsS0y8o z%A4f^%i5^U^JtpUI159;F<)JKd%^rcu> z(#4V_-A}n6(2UQ&ih1rM4#ha)^Lfbsf1g?`>^e!r!h%F3@v~SyX0eddgmeXyBpiWX zGRe<)s$5ggik!Jvmv2l(1RNXKa1;E8o6fK^c~qU-#K2&jkcvP!cppb8oiVVGWojYj zR(YCjyQiT<#NWjV6wqd04?SGb#ZOXF+>kh)PSlCY-m^nhwvBy^G!w{JqjF*~e;@Wq zl(Y@dW5k!L97x4_cGiOU4kLcov@uVLDzFZ4P-$VOj1@lJ9xySi`Gg&wY)6FAw2=%W zcS%z*;~3&uk{!tIVj+N8@ik&HUi!tY$7P#&Q*d0J_*2f-rI;4CT$j5`i}x$5?4`T_ zy6oww-ym8C&x&;roe6x!afVFwt!Qj10c z?R`#J4M1HM2{sXHF}N9!`+{T_*NOkK9E|$+yi*{19~y{U(5v7GhAQiXQ3v8WAZ}Im zuTg(cx=Dx_5Z)MqSN`DWb3e_BByPUAH3qq-u_ObGByfmOL*;UO_Ai!~0@n1Gq1poz z0y8w1@k9a?mv=%0E`N{QMh?FFSM+7!ZlqgQJ@oRzhjDftjNzPSfCNa4KqJ+-1*@fw z9vRR6`sGV%X{IKd-Mw^GQPnJxMSdhJ+`!EcB z3PdI_U^3Y`i+_n*H*iK8#5&y^z_QlHyhp4f;zucqU$`EHisP-zi=9rlNB-E}^VnDa zaz0U2lmuFvNr2QRV;rbx7V&V0M?W5X%ybA>@z+@eyF+Bc?d{RAjQ4F_x5)j0&5q*p zb-E7R1w=~$NNuL;NIICm{hj)$HO;5jON+Kbdk_&X1O}=X&g4iy%cYoFMj~q<@H{YM!HHGHUryCIpNbT3eouRi)UJ; zh_O|gErf)t9Y)f?R160s!m&^(+)5@8rYnU|B9H&*EC|(f{4f8=QP@Ai9(x;V(fd@^OA23CU;|14~MGp=Ge>zX*jF+ki)+kpD0!Zahy&2?#HG;FMoi$ zjKM*!F}aY9lZB1wG8l7i1Sdq2)jck-$7GV`a3WxiQ#{b;c^1lVHCrAb|3^xCzoEZ+ z;WWc5nB!s3LADA4&XPE~-^aWr=z+z3-VfxeiYht@LmkBSNgS3H6jlbXxPK`e#~6XE ziovcYp$Nte1ZIR$j0Tg^K3x+NqY72jqrOE7MfHmH-I53I&|LkT`@!+M@;H&^R zQTQZO`|)n>LL$56yu2*)Aq!L(w?|%hvJR3jS%;SB{Fuma&IPkw!!xp_tCfDiLS|%; z=y?gnvM)Cr86|g|hjgqF7JqbXU_Xh>;$hbWKP(<@n#JR^kAEnzSjEYl)o=k>4V1;i zhKWpBC<{`~$;gJHi?>Nq!^LDj+%chaxe{K0fh++n-5TpnWM1~IOa3>FPQaGHD1c!UH zN+I2V{Qo&EK`(y+5vdD+E3~T z3!nE{2!nh6#tK7~b7q(Aphr37*xMJ5caY6tWZ!Bjcwl*Q!%;l3XG$fb1D|OQFPCgX z#dv4UIvK#BHHLhYNq@-0_pTkk9BcR@`}3Yh#L1cBZK7i+Wd90|=^#v^i?cD)i_-*5 zxNiCi0RiV^&bdSa#EPFN_}tV!a0Cg{P%qJu-PqglM}Z1wCnH0LA$SO5RybWFOe9wl z3BIEE;59!TsrCfw^b~;C{6d_>BtI})W`Ea@~?I}#9#b~ zxGRXe@>e+kVzXZ{2w^B0Lt-1F>##IoE%)qq<=c6I+S>0nT!3@PXa;+3?#1yiLNoP*QD#E8V;&K{g0Q3>A?5$s2a`oXN$2 zR5>{hlP;L(q_G6594#g$HjB;>-It1tc1J-q~c{i^a6mmeMH28$e zQ%4nOf%R$@;R_^nwfKS>LHU|f+&#XW29`3$it-8bhQkp3HC2~C2`v>bJGG&*c+5;1p1z-dg0yHz1A%p@cf5lfz zZ`(K!zUNo?(uJH@iulq4+a8?VBxsQ?iq-`Rbc;Z->6oo7rIK8ySnPk_p@x(tS$5N2 z_C#`qn)&7%lA7^H_alFFed3$(Vs>(V6_8Qv#Q_aQv*m~|;)P)_3Iz8k4@a}b=%aIf zK6V8a&J>3iIFt`bQA(Wu?F;|qf8^EV!z7zzuplV@ef(+m3Ou=lc(Dl3+I4Yp`#Px# z^>}-?QR!l5VLHM2OEbPQ;|DWl@XsTUIlup78ZS*rVN$ZP;Kh1Bl-Hm0E?i&Ev;f7qqWcUDP> zlL}nblVtI2EVz?odTW*38*#qGG0Q8Q6!Pmv6)L;$eZMJXVJtm7q{^)sc8SlukaL)D zk8?xxJgE=}0Z+LeBWIpxWmRnEl@ZrwL0)ZuSd^uztbkz3!9}(}nJRHxlCJa8e47~A zGC@gXBq3nbxCfYJFlU6ff2r-9gaLx%$m5LIFFXvF`!R$oy5Y|1SYiS(!WcS>dg&u} z5U=yXNMtJSO#)C=t;R&Bq+Un`tQ5GjvNm5&#}RdAH`9y369dP9u;$2Y6ZkIk)IG43cP=;C+$0IS_Xz?cxro*VQPmG%K~^cnwLO>rU70c1`w$K3?<#z zPzJa%JCitRC0T#o;?Ec!$S#0tUUob7o_CiBFOK0Ae?PU5`d&=QQ$VOAAXE>; zV^{ktAhHZU{^)<|IA*7xPp$nk9O$X)tv`3x$9}Ah8`5>N=iWVHs}GqTutk2s7VFy5 zDFF&xq8Z&Mq8qsSd3U_ewsv^X-2vW>L-rJh;s93`$j{_3IABXW!B!tvc)%8Wx~<`R z^rgtR9n*g2f3ob<&&`vur#9EAi%Tm$x$gf5=lJcWPr@?K(_>f~^)MDt4`2y?^wQW$ z{c49uhc(gmc0?OQa^2gt^L>bg*1w{Z)nzKz(gv)p*gwXeWovV?qq^^{m27t?w0~PF zI;8T4shIAdWYC_Y(%<`cED5hXS*t+I*Qhph72n9vpN3H6I(+H~{?uSlUyB;te zK({GKyB~Oa`Q~Q&$K{ZZka~hQMfMJLg-)O|S{TQ58Ksr;Bu!CU4F>1ZAo3!}d8f$x zVapf$e+_1(%gKn@qL?}IFbzn zrggY}ytpklcZYF?Q1*pl*E!Y2|D|ni09>9)(`LXip|R+9kESNY5KLWaNbc0O6<}XU zRp4sBtI(UV8Q7$Yoy`z!hK2;lngRx}!y}<5e<1n}pto{aBd0!gvlQF21E4ti z*o_e{;$om11LGLe6!aGGQAr(EZQ_X3z@&AnLW>7s6EC@0Q#rRJKI6}katOPuX$ zf4uYznoGFE-3}{9B9%UC>!$H*^$>auq}ndbb`f=Bq!YR*4|NjV{07}_uGTCTO7o=8 zAgf-(K`OiL2OL)6@fhA|&b)}3r;1R~*>yQP`40!4e3ziw0~7)?HkSb#0~7)>IF~_! z11W!{Sy^-AHWGfHU!kWBRk2_k1P`bDAUl~Ur<~cT*i-v3Nx5JVvbd2*EsvS#-)}b> zq)3_ecx#`4hH!L$eN%k)G~=@m@3fm>4vb=bjFI21pL&MHARsMfB&DT}A8~ykP7va{<|GfY8{Jn_bdX{SK zUgEQbluShajr)z?rPJXq88I#dY<8O}c>ck^H*saGn3l5Jyl#tClb6MO5hiJn+VSv~G(zP)&81Co=GdOp(!#AaOy5+S=Ar z!izLwN_Ap=sIDGivnidnGFoto!O5!J?M-Eg zaB$(IY74)T?`#dwBti$v;R&#z9MBO#2{XUBTu))3qyWTwvwl&XDqtf zE`rrE(m@!rBvPIweT*mzmFhttKx$>&Bm(zs+R86B-QGT!!Q9k~yhZ~MU}c+pg+{lb zvpeH05DCvY9jbsqQ#`d>Q_+7ySrLmOW*U$jq1G1lVFw0M_K*l8H;v~8w?jRv7AiTf`K&jOCM2@Ff=^o5K5XH!?lc} z;0ZI6*>C(wviBJ69XEgHUS0tiFj2{AtiwA}FeorinxTiwG&%lc+NYh*Qt*(&BJHek z()>gsS;7Y>EC7?>5{EGO!iiiG`ID!9Ad>$%ow8t_^cP}4LB{sAZL@Om+j5?W;2wCp zU1Dgx^7>6e7N0n6Q8os{35A}KTl>iKf?h$bJn7!Qo>prT4Yhx&1jk4)RTsr93i~Wo zOu7vdy7!31BVvD8Oy0$z414{qA!D7=fICIS#l)Qv@Yi2??VSAj$LrUeTr(+qbSOxJ z^vBo=gbyp32gU&;f_1qCwQWlj@S6uMFTDN~)K;eo{D5|Tb-TPfqcr0De!477HCDzQ zSM|zl?IiahxP^aE&nr*LWMA{l)|9`DUp!W9jwJ2STp~yp_)1@>cjNhc05!#E7`ap| zpmdZ9Qn7>($>U?G^Rum2P=hvQ^-$Pv;1*IlG5?V@?s{7J$$^A{-bpff+~t<;IGjEs z3U}Rn{3HhC{qM_3i z)9vTG;d|F7;|W?&s&gKkpA)^jM2mijopojHK)~ylB#6F)%=Q0|%;&;Xhkx!*E+%7s zDEIy*qNiW$$TvJ@_WSMO_{{aIIymKFsId|7Q37KJ{PpFMV#phCU%r9mlOsJ-Cr3|x zexaDcniGFQu}YJ!=5%2@^tD--Vqxy+RoPaxr8USPv{+bEAzHX-{6V|5PsV3Tw_qmA zDC`<}_&=3Vnni>3Qqb)_@!c-3&S>GWZMt?XVKUL<)aJS^2shBz! zB?c_Gx&!vOFpdk~a7S7FMO0bP&m`u7h{nzLUA%wCeXdi&-`d(fI}y5erdeP&l9a{+c*6UhMqI!--$O~e{eaNDW-nCecsVo7hp z@4*XSPOxCSB2~u{#D7vyQOdYd&p#>n)Lw7%r;YQa?V($tLSf#0K9Tr^#Q63Sy+Jx} zIF<$+3OxGN3y3Z@QT+MWugjutsul`>u8n`7A~+cp!K7&DKMr7tJDO8Z0^M8LbtG8u zZ1a`rxBGHvWyq9Z#T7G)ms?EI8+t;9$|k)b>4Fe?&A~z8J$7RTc0(>lyXqWR@kWNPedHh2}20PnEiy#i1I7NqFIC7>pL^>jF$L;h~DC-h+i5 zO60y55o;7`NI{WgaeU&0^i{?+VTOinUNB-mOkpn8O>BA3uZ*Cvy%Mr|-3SjggyQGqUk&`VWI6U=V$zT_>Tt1AFn&_bYo2F>K^&37Z z-AuRXjO!jwj0jW354h3si_ov%zh~f+vUi}a^>R^>h*cN7Z8q+}z_KKZBp9xSi}Ak_ zyjI9)wCk7qcmD;MKVC8lWp0zz>kk4kGm}xk6qg!c1Uwob2|m*1Ag6K09y`v&%Au2F zJPZZf98;u9LYHu01SEebkfT_{fg6nGS0m|35r)AiP`+^ea5P_x-jB1w1pcMjn<`h0 z3m%@^RaM^AJl(gHw9GB#*S(MP6HqjjQp74?VaL7w%Ouasl;Brt+aJC%fT%Rt$e{$vU<*n(V&U?@mv7 z_U&6=Zij)K7?Arn5nV^buI%S(K;uu_0%>(sY29IcBwSswC*Lpjtd3p@z7F9J7a+s}&>QtLb6 z*xb-00GL_mFHFm`+SEKvD$94R3tc5MDGlc}ybVA}PM<^$PjFQgT^Y@q}sm3pG@F^F#ddf@Z z<4|0qKu9-iWC�f%q+apL$Y>P+~G%Aw8Q6JSo5HI<2ymDXYsu-)-vZ=Y_c!{(`~n zwKkjWuzP>PE95$U{l@7m=Wl1PA34;>c9xhb`?6)$tRv^>P2k&Q(Tv{-*|V*?2f=r; zh2I-dw)`P2_n4a0b#~pspvFqXwcY6pJDyBBhtbZDC!ND(=MSXfez#gTCR=yRu3JCW zt051DJndutqX^=$UGtj^5kjf4NpA7vX}grdi+F!l>$EHumP}yjE1#fY;6%z$3MY)4 z({uz^Pn{;Yc8BZ<0;+z?yQKO=U@?}{YR+Tim6Q(Gn4CtP^N(mMFeINo*s8>l< zYYh2vva~S*IC#P6(0~L$;!~b(_=^dhfI-}Fr6-)A1)r>7I6Ir|MRGhpogMNT#Yo{Q z3wD1TOcW|Cc*bC~vxT0ArMZC(;3@l?eQhpCyhwSDdkb*NX4MfvxK)a%2!j^6W9X0v9QJ*o!tlplXVbuO5y()hb=w$S zF=43<-2OB#ZLIy3r$u;baLL!wVqcaVpLhR`pwLR<1pV)#QdgMhM4 zRn~}2E1eiBK3Gr=zP2~a_TlnEuc?1wT=X1`#+H^Q1t%An+k=1~7CHy}AgK#pTV=%9 zisz$`S&smS3(IP#e_O-enhBXzP-tygQ0IW%kfKyGIAX5{7C^tqYkWpx56HTKV0+49 zL7gamS~F<@5wbnzs>(3wIK`;<_P4Nwy=bol7>tJ+*;!$u0L^B%tEI{U(p`U+1CY6X zJg(Oe`T(6lx{WM>B2zFVG}b&)5^$2FW(_cJbtJ1*o z$(WM_#7h6U&ME>Vpwi`lWKI)MHqZ%Fx~=II>e!5SSz9OpB6mO?J&EPX5YLzd+|wa@ zQ$VhQ-pH7^h_Co}&?d=bavP-|3zY`lwHd0E@_D*x3oI=lhI=K<2Xm zowc2i;Vs<)>wuNJFq=|a0JK~ui!R(kO#v2U7)s>9nq@M?eVw*H283nscoRWo~41baG{3Z3>sH+5-pzGdGuzX9O0P z@vs6Zf883%cHGAC&aar8K}FL-Z$P&b9mt5*vpx1pO!E1lD1HzIn{%t8rp&7QZE?PosgCmce^8xE9(84PfBy33hbQ0h6tFO>wL=&$ zmO?O|cvKhgl`>H`;(%3I;nPN0Me9SgE}J6niZ!k8n{2=Fpe;Q!`Xb+!zaKo7`i`D$ z`G?=>Hce4{d%jeurFYt{>hq;Yqjk{|!8)E2EEB_IEP!xFa2aAsGHYcI@i>GfWy4b;s&e9IAUi3dn@vGqOg`O#vV@5~(CXbe2sCpWE$ujMzz5Dg z;LzEJJLxus=TPWZGKET^TVGaWe2Xz45ksMefEtD^IA zvooO>12P^2cX2{+QV+`UgwPHCqV~h?oJYF?$jNDEm%;mcdVg1T8=~v&xmF7IY&|;;v(Ynmw*8953wzN?=CBVf+`&ZPJ0_i#ze}S_d z!g`uuN!!MW-@Rn(*I8av`3G7K^jq&r9v111=Y;lBR^Uu0q^u1ywin>l2^sTPK9n)W z5=WwpqhfvUx5_Fn*T5~0a=$as3vgcEuv@&(@^1TqwmUQM_O>1z4P6QWT0|0{9M>wraKG`jLyJ2pr9(X0 zmaQvS@Et7@pI!f5m%Z8+^nn*=FEPA#nOYtFRJR>LVC?l@@96E&dOR6H+`(scv-6oG zWm+F4Q3~G@LS*~KvAFKn7&4dc0XST+SJ4hr$YI-+`)!$LU0GLXe~3~O-|o8^-?&3% z0KAmD{kGWo7|`DX0tjS{pDxY~)Do+UOUzW9#N?xWNW}siXvhy}Z%|2eY!_5ERH*3~ z8o1gPm_l{x?Qpm)j-F#6xLjRae-;rCjldrG11jKC3fIzf1v!yvtn0=2rw3i9pLZk?Yd|%O$#3VQrw|yy30_N#+xT@n*sAtBAgtcv z4O)CKrw9$XREkk^m%n}g^FQKXI)Si0_5o!$#S4hYc8cxhmy4f6*dX-@u7h0#Kh;&t zDTlO`Bu9jY^B^D<{1b#rX3~M2=kG6ndv^8R#qH`Q=){0_;%vPe|U=FY#NEOr8m+9T$L7=dj_?rF^X zf>S0ymIT%uR2*cUK@MFkCt%_FNV?lRX_FX&dDVJlVN{T3G2bOj#d>ZI zf5z!nN=oesSBmgTAO0mLG-t zBQzY;F%I0Wf@o5figenq0Ye>DYyocM3?aHmw%>0*1dTyL!%LI4iOwz$&6`cutUE6_ zSo7p_1UI){C&Pa`0kTIIy&*qIK3|r4cA1?v-kT5>9LkHMzV{f1GWsY#&qb0lf04>B zQJBxTVv?8X7732(B`U(g2eUSbqAoQyp7ojvaTNT;<#Vndll#GbY`;zkt4FuTN)Uf8 zC9H14p$@uaD5l7|eN4E9$W~^3NO*P`P#@$CWOAavzl4d54JdB{#?zpJK=fSO)B3sX z^>aIq_WyJ3+=g)^T6|JF;tRBMe;d>eBu6_PSjN73LB&w@VqEpo%L63MwX>XAup)gv ztGlmqjoWV?tw{E(|Fi5@Asi3~#tQtuBl|$xKpLLdZ5+bmyM_taDwvwUxW_IJWlClC zN3^IwQYNIu)mMhMKeq~3VYx#UE6f-8&xz^_Dii}%;FnfkN3EkUOhNO^e=X=Vkwnw? z*q(3`-4|8SWLqrkRooBB%@Bwc{#-gA9m6S&j=AaKQ|f)(7?{&faLWH$>NXO3JVb<3 zWM3#9je~tccsQe#+Sj&vtZf;sY3K_t^JZxBh5vOfiGHl#z}0TTT(>_KO;xxVAAsyL zKWB&YhJamj9q10?;{oE-f80+D%tSia@LS4sHu8_z$lI;^wl@w8y0P+5IF;qmp(7w* z%C*!}juigjQ8#Xs131gBC1HC~=4~flm^)Atlg!|WQ$zT^b!5T1b!{`fH^ZpY&?E@Q zKhWS#J2vBgQNfq7{h>Aeq<}N!Ko{)j1_15!R^kX)s|k^K;HJH>e>)`-_j@AgjDGjT zeWVfR*@=zXnvQ9f3}?N~yIUp}6d(s%Jry6Whe7Ckq-0~3d~%qkK7W{Y8^YnoTOXUIVN(0(Fb%@nWLkL}lmsLg zF%sKLV(R+1zM;wD8}RQzi>0+p3XU0#DKK;@0fm`CLD2mV5+UiBm!aAN69O|hmq8B! z6aqIfmm!1#DSwR_*>c-B@;zUnx0Ny$LL9sv@?ht1cE_oi8ap*pm6_UNB-%oSBDExy z#QFN|3lyoLH~XMsqtR$I8ht|1;$aajuAfEny}En$;;l{>TB|&c&EoDJz({4WTO_el z#-)q9?c(F|wy3whc-{1?wY7Q)BL*0M!-IX<@0!)xq<_n8$Cz!y-+EYhI1oWuw2bgI ze$DdZ@2{^`f8G7-#an9@smjwxqcA3qRT?J?r>xCbl%JSvDs-x%EMBZfWIFxpf?0n$ z%I5v9d=xR-61gnaHZm%6da>3{IhXL#y4aU^K9j%_k4FKCzphNSELH$LGU5{-WK$fw zG7uAgfPaub5p{llsARhQ)|YkXo4Vt1bI&yWZl#w(&-&!~kvbJgt5X%n`Bc`sU5O;- zO!+3Yq*|PLf4R!^^6uvHipL}z0$tUt=wvjSo(CE91Mi}5{WpdWt?Y8#^s-OKGxtr4 zdLzBgiONQ!Nl$p|I$rv|V-z4z6Rj@=!5RdTLw|d$OIi4ZRKpx(U4^>a%kuSC-^s)t z5yz_chstjXAnHN!JoRT3M{GkN;mC%p%TyMn;1?vWdOl}p+9(^jKm)9_TCE*e+0~<1 zRHBvEXBM?-l=*N!`Y zj0_#%G;;eSVN+!i!X0AwQP9<)*qrI;^ zDD{auGTIJ=C&y&|TOQGMj3pz#LrlRRz6eshu@I(`sp8ppGPuFEQh9>P(d;CHI54L@_fCu{d6KJ7!k% z(1VTzlqeiSe63_ha$cH4|NM%$Mt=~+r>g8QB%#}dm48Os;iYx`i0bJ;&HiZ74)IfY zT!#>4EcjAB*pxzc-$LvmNX*GDZ;#--w}+zb1o2)n2*R)69OWk1sa<}2{Z4C2)_F>s zg?Mx3%gjC2Lpnkjb2ToF(=czA6!4Ce0VJulOic0vK?}-YO?EuN<_$BO27eREabI`D zG&^ldCXGxpVw#;Xg)FpvD21J`=e$EMGFv{NLtY@0jCV$)%;xjb;{DkEat5+;S5^|0R!wA>BKW2m{gr`n%2FT z(rVc=@TIVTb&4dz;8jV;N9R}cbMyH#$tvaKU=}<)6o-?U!uW{-TYpQHkCk)&g96)e zft?Qpwvrg)tV&Ze^;W(c$gDNgy$pqm0r0`|O?Eu|d(uJd5GkDl>#v2$uIyw!D4VL4 zjBS=C?we!#ba)$A%5=NY@+~H$rsbqq78`V(9gjxT6r%EQW%05-a+Ib_>O%tcg6T@A z`nKI9e~vP4TlB)l3x7?}NqsTvkvnn*>muO+fbal_c#Nm!V<1eBrNTFaD_mTkEv5j) z;MRZfTdKog@F}!E6Cp5Q1`eo&8R0QuV`S$-KIa|byZ2Qk4Uy&mhuZ4u3SQaWjQu4s9+E^>a3j zB8V8(Ur0EXhb%YPc20#K?on*6n4!^@Lh~DiVbGPFc=HknE2?DiTIOPI4E<Y1vS5{g(X~1Dg0CeK;Giv=c3ZYswNb~HLXXYSA15(1AV8sk8V?+S znO4WG(DZ?rgD63>_6`BH>sx$ym}{YpGSDLh_0Bf$k;?K@^E|6e4P}2kDE-46n`BI( zxNaRAH-C=r3oy|w!&K%Y= zj9zAFtMD>o^~AuVM5QMSeuk;69gw-u=*HRDGlKEIbz)uzBCJM+$pBN#az^%)L~tYa<5cwa>wppt-Ln zs(-^5n+h5-hS5S!g)eK)Nd3dK2T#GAN|zQR&ZfuwfCw(B5qj#iIRgWbVmSN%is5Wj z3`5J2oDMLm&*iDvsXRY_FbQt~(5~}afDWZUqCkJXW#rr|58}ZQXWvY;a?ud#y5k-- zCm;OXJinnLia$>!3au@iad<^JcKq^y!hiDa^bkianH&;Mm%6jK(#b=BNfHTt>)}N? znLO1#GKOBR;maz_K!rb-ZCzH}@lrDu_?LoWhSy=igkLV05VA6Z^?yk> zFC2Oc$Zq(vBfk&jFqXFkm9sE4s*z!Y9MPB4BwQC_*%1La9d@=vu_`&eZu`w7-qiE=iXMv4ZeTNXRi?rCyXE6-(X2)9}K!>pgU zWRe?!b=S1qjsQILH2`nR4d_-h2Y+J-o%F^glMf&psyBm*Xy=DU5$3=_+tC|^c)f zR#d`Ko5aikn)GsAXu0HXEP&;OZ0N@P{xsKo>I+|o)$+^%P*=+HDn1FxzAD0+T?!#I zHH?c555bK<^5Ngih0bE-a?rNPRhEU=Z1M@!o4aTK1>To)lVREumrRia7XvXiHkZM7 z0V#i3*>c-P7Jb)OplV*AnkLlyk~cZ=lEj|!l$6SY#p3bU%b7(iNkWciZ8#m zfBt#NP2To#T!*L0SM?>!|Da4S$7YCAeZ5^)ao4b}`t;gWJ$9t8AHArmtAbXGIknom zp{%!+KV9O~t$%8qhNA>N+TQ3V z{P6ovFT4_0uLRFb;S6Qol!7vtkY}vs}U7CHYhjn}_<4RYx z-3lau*H+Pn@bw{W4KK7)I4B(OcFxYO&>(c$;eF@jKT*6k35_un&< zH4%WdL6InBKoW`e_O{_X|E&*I#_tOL!dF8Z$AZUa@NOtrA*2t=4i?f0(@AAalx+xl z$P!6J63OhG*7x9-_kS!-C^dg?50B0 zd`YFR`h<_4+0{?~K^1@9RH0Pj$QiYsGg0iMSR3g`;Pf9(|t(`bd6)}?TeXV&k#J6#utn&|& zQ@Y8mq#uTltOBJ{*_sU2aKCZ{XKh~e@G@{_fmL9TF5aA)$k zriC`{C~`-6LT))>dh)uWl5jC0*OU`nHjW@aS?d|(QWZ>?^B|YDfU6FWmz(lG?Uo7# zxJD}qUQ>7zNt+q)niC-%vDzh`3!<$iQkx@~Pu72T2D1*NYQoGY zysGz@rEvH&d&~|Dr}|hrlHE_oFk=U+a8WrA4{*yZoJ_wvzHX{I{tSBcc(=ekGPG^DF9JHS zaWz5LJ^>I$rwOjW4^V>1BO!3Q+VjxaaT$N#@AbJ6=q=9E7obZ=KXljw?)Yx;It0)- zR83P-R0G3t$%c8D+P8j$qU~FVFd#x5qs+Fz=wl20`elFF zR96`JUs)@EdkZtZ6VlAVtIj#!5F{h`23Qg&_C*9$689mF&hXoII2pr?9fT>EJX5xi z6djS9v%|9PmSa7rcYtqk`By|KO9aR-@?QiPPPQSJ zq1pozmr|Pq6$3IhHIq=dDu0|=bKA%ffZz2i__Rf(%;DTel1-K5wY_pAuM?3^n=KVB zVJ((6c?`Pr|((cT06?QM?|;cV~AZp(R3Iz<>W@Ojp|J^M#P41Xnvl{Oaxwr0dB zRfo77oef`e+jGQeu6v{3ds-6bID|4p3e_7u^nUHnC!5vu>-le^w`Z?L(P23?#2WD( z$HjZ)4IZlvl1RW}q<`N4qd;80VB8qNvarFlj*t`r(USWOerz}dqeL<7x45phU~ndh zGs0FOtYd(pS)B@P2*ZhSsyB$X8Xuo1p%{@;cREquALIB$2`vbx?EmpZjT@R*3Q7!B zotjq)PV}yJ{}(ZiI|B(`N@&{T6#aeq>C-v4{pI?&5tNdI3DzkA-w}UH2Bj~$b%?EF zG*a2ic|UmdG+j)Wi#0o~ z2?mGPBnE+&G&WeG_-RW}2x?1G0%3d)&Iq-qCBY!@lB5Jwo0>qwSwF2vdxB#{FtYH9 z9ArU-3#wZJ!wGkw2rECHbfim4-Vw`)VJL%$vWf_*nk0X@olVw{56ca;EawRVOYkIy zp}7NeF+aa$!-Ta|23if(ASy1h#uNLOAwInXLV(*fWmqbB@Qj}Y8N;B9m zwq!D&{AanKg5}ge00?Tt1h6CtZZPGbL28Gj89Oi#yd5zD=n_=#e6Dw;wvV$@tM4p}`M?G=d*#8gMdY5QN8qxn?xay9SKFAapr`s)Kfb1uowY zZ#;9Oq&-7}FbH`BVd6XhQ3x-LDlcf&LyO4WDe00@ZH#o5P?)x07(*GX3+o=>&8&Zz z-j|j<4!?XD6c5|el3*YNNn!@*V71swqU~u(P#FA}5;H*9Fs3MCGFN{w5W<)eBf!#@ zhU*ucSbo}(_6*04VB`@TiSvMIq~QKyK67qA|5CXNz+G|*l0c=3ZjY!-s~{zm+bz;M z!gYLo?1?`O-s~oC+f6Z>O{mN0h5&!!7dr{b?pdMw^yusi4h5mM?FfPI@0;t)vn?15 zUbiC#=GS~MBxUZlv}b70twW5UNS7FOc^jB9bCf&o<@3v;e!}KvGMoLeTbvAMk1Laz zYGD9`vW^I#*HONt6mglWzbFL0suP0h8H!y&0RqhwH3C8?>J&lvo>!H({xg3y-5Q~x zraQz4HQgmfUDKMu3>RfJA+;hx)jh=P^1Yl@QLOJLvtkI}Bz%L`?N`6a+~dF?3Pm0x z!MT&Z(=xq?xitU14Gq0T$R2zA~iMqTH-c_2=Y%^iXDOVya4&tW+| zS^QjVKb>>aFJ{~M@P6{KV3|A17%cwsO#)M1+p{JVxXwJ~io)OvJZ6yKn28pYxt<(> zAyjzGz#)8ip$Zn7DpP;rtr;3hyhDso;$33wOWe>_@7V3y6TVl0`R)y1ZlQ}3Jpm_I zYF;(y=n-byg6sZryD3)XN7H{=l~?yo|6zN-soo@pG@?Mx62-%-#QPVX<4uTxBc*RV zr$L~Ir8|D*Ip!@NgEI#&J*O!d;>^KI&*@PZ6bx7CThE6 zHs=GyRe!NrO(%i3gbVm~cvxQMTtyfTlcUs}5OKSYby0_6yHGKP zKE^1%L=fY8&vJwD)w_}4FVY5DG|(jpe!aX~7qjAivybY7z8ha(AAcG8%5 zyPax-gGM;4|GFvG+nDO8G9vy}0bN!Rmu85VNdoA>!JA*+{c`owi`0m3pTP34zr)Hd zJ{8Ys>4ROy2fIX_$Uz?tok-bl4h*dAd_Y#eH0XDN;qbo=%-kxM0W=2`0Wg!%HxmOk zF*lP?k1BsvU2obj6n*Dc^1kVP-ePAJCIJO0_p+ z)-efo+VQq1H*Whi+jdO8#=B)9er;H>l4qUY!H<95n4k(TL4Y_ePkUe-35;FAc)&%L zLadrtEZ|t4LWE5m2m(SJ1xW;?TA~ZQqmJ7~tr%GN$U!Bfe+Y!AMDzjdBd&(<(*p=L z@nGP3wqa_put^W%PIG{goC-fD+2VCIq=UIIJjumzo@|o!BKuMj9s8)(-q=ChMbF{t zIkA6gh27 z9uf?_UL}hZ%o%qlu7RXN2dWQCh*I{Zi3opXWC;lUSwL{NYGc} z#TWYNb9j{tqkncmyZ3i2UuMZ`vCV4i@i_~t!8y~cj5THc>=A08!P8)N0rO_RInipV zLc`{^JkoF;1^)DYUbTVN!Y--kIb-aM2@SohKY_BN`A#q5-7VL;kTE_5^~5K}4{GMc z@@IJZFnjRty47dz{5@=_c67Z#^!kvn7NU)w`cNZ>S!isVqXoZUPwVyjrJuBxtg$E&+ly!!Sc4$rsO7q31@zT%2WWyI?GcEx$j ztX3 zt_lcsfH$tIoA&kPS{WH-_2H(>31L(3_GSKX`Q!DUR}wY@R@OXWsg$%oQA%{wcFkea zt#4cln*;W%4?F>uS*o??e%*g91mh-ff4$8KSEplKp;(fppa+liKHt?#6h-RQ5U_6R zGRqs_#advxx)e#&xuR+b(p?H0-G3vnCIGFQe-zan0rCfjk_PXQhQpDpg5qA zw^yLESVf4@w4K+}8V)E{1{9^3f~V&%E85MWMdhqjVxpqLr;a}5o35xU0xHHCf4!s; zt_(|4-3!~FkmhQ9LURpS!?+kUbcOhmP7HT*#KBC&rOmluQrn(R_qflq{C{@&kd8WcG+mR=Gq|rc48e0dHh6-Xe&+ ze*lR9;-tILJGH*S&mPxmQEw(@I#Gj#mbF`EMKYNVXgdcRJ}^SW2ryPJpu!##^}Y|b zpuj5MkuAJl+J(m{2FncxDe6Sp@9FypPXJfdQrKq z7=wE=^e;QFmEa;*xqxqVV7%b#Hr9DfxATXN(%b?0#?y2+dH984u9nKPz9fssql`D z&u)1?D_ATBCqU+dBH3oqqZR0IFclQi2kPhd(HcV1IT1DeI-&yAO_RIM8^`G_QqX~( zY3}SGF*zdGMwp0F2`nK{R|LYH{o>8{pFYy7cbqT!9>k1e$U4SL9c{q8PBWLrrG02U{}AD1=Bq>->EY$+9O^?m-(U13s>qXK<6HlxFfebmRt?V&7j1&RAmbU6Wi z*28Czn5qJTbz9eytD7co38kh&0$$hj%tK7zQS(TUn?q4{YfCUsiUJAK^9VKyMM}og z-7&pt9Rfj(ksncRipn8?#Q0zwAdWCgmUr)Ioj@XeqY{5)7S z9O{I*xh_O%qL*Qn1Q&mI4h|L%C6>|s_7dx@-h<+a|H+tfD+f1S4C9ai?BJsiG#3*# zQ(Vl9e}R^Y!D9!PMMW+#C76nj5pj_x!II=fJUtN}lw|QS#F5i|bA9pi1qT>#2D@1x zC09yWmgv=Hck#oI@hXFNaCneO?dslR?m#|p5RKoJt4|kyrJ{dY*pa((3H5c#ffi}r z?^(GWoMkFaGOG2g&y9*375(MSrIpe3cW>T$02o)^Zkl56gFc!Nx17b0_}5;nU`YAT z2S}J;eXh)@*my!TffguUEZZa$4YF~~Y@((aZBjKt>==E<4bw)G|V&w|rdYSE*S+TVlmn z&DBp{$1m0}etEcqpu4ckS{TEG?%hUD#rG%jpBMlA8RCBftxg%rm)0N+7FWo@?e+Tc8HThUCvqj#&Q7I)m2?!z3vf=Z*~|t@q`j zrTrH-hQ1(8hFYGwHQWfgwKcU~Qo%V(K9zpl$zgwe8zL*;AyoEl^B-)#thZog)(t!of-;{?e_my@( z@K~B}K)Y}B(i~-bVvdp?XUBdR%gw35hSac3&KREPB%KdWkZeJX0~|ec5zqpt$Q7ld zp&fsK+q&N!&kU;dei#NYKh2`!6EPH+^Rda3b6bcKiQ(t8Tn1V^vpDp?&L{RW6Iao? zAH(5#n9qqMg`gEIsiEc=-WOyoH6b|v?~(pMFOT{`EEFI!>sVeNmZbLJ^Dl=ds1zlA!cEPlzChpHTUQ1f-D z!yRv%Ja3PTF6@YxF-*&ud{3WLYEGRL4#ucmf&WF<5@b5SxFR4$F#4DDADJO4El;1p z!oioPynY&8`z(%~Z30$1IHsS;ScCK1ejua8Nykee%?dleAy}S_ngoK|FgDmX^<97C zcC^`X%wy@+!zL5QGJ;8CUf@`MSdg4?9|Ff^<&dTPcn#d!oya$qr_bM8C>$^HQ}-4` z-aoRu@--+dpSh;MD-&_XHHCtnG@V{k_(@}LK)q-<7Prho2{no1My)r<#@01%)A`A= zNy7CCqynvN^btpl#$Vu(23kVG*n@xYVU#!AJOI$!GQ#O^7*yg{S=I*F&P|jT7_ZvrwMJJBQ%mnV3djsh2sYqP^zo$n3ity~bpJepsCd6u7kMDnq@bW9* zJ9>QPQnedby5{!;fD{LoP#(g~PyBVgnolCD^^G;kZZIEMao1jiQj&27)NoiRUelk3$&rB{$h1ws!vOi z2bz|8L{Z-~0&oCuJ-S*dPyj?+AV2?~mm@q{HwN|M

dc~Gu634;2W?o)oRph~)Dv`*7Un3d4yQjrpV8YMIA+k7hND;Ff zcsU+rtw8=6uf>u{E{AP7i&#W_t7eX860=xj!}edwcBQ|~b!vYnkUwXFaes}W(e%GY zJS{8T;ARUROt=hGZ8vTIv~gw$MU*oxv#E8o>goy|mZb4;6Sx7`@}L5uZ47+_Sj`zJ zXL0fZg#g(_JZ#%4yyuztent$p-A9@!Gp>Rpbq$`)lO(uW<{*lb(qjWRUH|xgP9~nm zEGmY}p2~7gA`ySHA|E#mxIHIJ#R+E_FQ%57x*?B(gcU@fU^v#wYEL@9z~~UVM5H1} zWs&Yx!Ow(EAa2NSx}H-=#2L$TF$i0o&`7YN$iLPI@DYD4JkK3cNqT}u3TZ9(I{Pd% zXcQHs5sZg1CyLJs@1C;b3wI#@zrc+<>TG3Z95PceWlWHeB4}Dm3(t<5rB!ebRB2sfc*r7n?ER+7PEXrSr`0Mn zYOVc;HH}+@R$tp{SNkvEE>{?9t3uLh&Lxd*sdRsWtv~`#s?L?W4D7zrv}9Wv57mO{ zx^GGgcs8!)$%Ax)XMAlZ(-xYgR3>^o$Tqb5`{285w-QfyV}{ z9AIM}0Oaih!<|k%H?w(@Qx^MX+LSC8<2FkDU$K%b$;7@`pUfd?mJGYrs=`=GlH7$* zB9dTRLE`~v^3zA3yWE73f|QHOw#U*gFdcuc6&Q&=?VCxQ9XJA~4Q5O0;|#=tzG#{b z4x3Lr!hOe!6m-0R7^v`+#W6AAvI6gJ=srH)?l)e!bnn0EFA7q#b_zq58_u_l_QaY- z;AZq3UI>pv3}5?Duc!&|P=Ijb^SPcwBbV&G_}EwV>O_r9Q$B24L18Ry>#{JW9G8EE z@g8*2P+Hy~pYicCjIXbN4ZM1asNhs=M8tTOyC;i`I9+gg`8_sUp@e4;0nm|*jTVTR zOp3$JVfymzU+VvXcXd*bV-TKGeF?QxNk!% z$1(peUX3}tFkj$QctXrM72wIRtQLQmApBLB{!k5LtGcE|3{=-(#0BJ2J?xyvSz559 z7-RhM4m7>3j5W-Dv9q>>tjeNPOu-H>lcd&&buNN`xO&yYy)xTB1cc0?GGeJg3L^kN z?-bSwTp5BfG#;mAgZ56Tc%N7f8F(pfDVac`pN6L#o{X+=@ZzB4@SJ}dC0IOR zt^$y>Pr=`|R$j0o9vk+(mEa}&-aY0|dnSc6F2|!r+Y=80k!A-Ef-S{7XfXdeO;eUf zFKE3ZmW^t<9wWkc8@-$|$P&d70G!=id*ZtspFe5U7lyh;hkZVu)rWTVTEM1F=|cjH$_DR72p!SPYM-yBFn zOMy=$9?8fBB(6Op?vWs9)%IBH?X2+}b_eSXrn|bmxW4zeI17zk zeP815H{2H1TTiUpoo+U^yS)XEI5S`O!a2+8+UWn=Z#RwA^?2_=yi8L7PX&{C4DlW_ z0pad0=5aDP^!DM^@5n`N-IoD02NailvIGw`^V;^^ncFoh2M1e&EmJKZ#lclkB=Tk&fN4Jl^&%e6@uBY27++m1rY?g z59)2TQm87@V^x+A$r$9vX{~nIUuw)E=Ol`gUM;Kge^FbMh=bBOQ%n*LE1lNIkVaKu zwy!4vi(()xMXD`$5Wn=BpL4VzRvdB%j?-<`W8%#d6IxW zk9+;KE^}4Cv5Es^`|nHK2HXZD;*hJDbkJPom=r>W17Ytmc*!(B<4Kn=8=ACB*mC@m zEFx(fNF;_t+2~Gh ztbwPon@a{k6sK7wlOT?$i>W(Ex6R;+UfVCAe`-Q~lq2Y1YH;>glqKrk0|2G3@PbrI zk_dWwLx}V~s}+Qrv$%^BcYCP+w1@49uU1VjAN+u^t%TA=1ff2b{19R)PvUTk7DnO_ zOQ2RdAf=-gAX*MJf#Tmp$+&wIiXlk{ zfB&Yfsi&o0W#tq$kGYtJP#gS9tilOP#E8+NT9s`+$}tpbRq|mIkPA3sC~6YlKUOZ zYB*(QS9-Pvk$P%WWeUVEE;Y(D2GuO%x}`I(3^1-33V8|pMw^0=u!9y>1KeMPDVD>D zKLFiE?QDo4c|92cI+FO5j37mDf2AI}5SWdvM=^NMMRbOKl9aG;cqNuA{V>{oL?jNA zV`hj$|7K=`y1brU;#qerq;iJi!r-@7f42oNJG>==LVKrrzs`gZ9)32^@7;X%_GV6+ zr{)asB`{Jn2K){$?um4Lkv6JhDjTCm`=gKf$n=xCe@aYKw-CrxSL}9Ce^;or)o26_ zY81LA?1&3vjQy7(3GY3GY1d~ENd(p`X~7`sDJaL~?t-?oV|qlKi9g;3$dK?uf{_=k z)x0pxsRHGeqzL4IT9NdO%qB4%Y4^<=EGZq(~;vpTuAOHzA->hhw@HgZh|zxJuW0 zxR!)orlNhy#fjQ{`RYHzn#)F)K{N>!lM%8L1UNY~ zAd}GrD1U8LOK;ma5WeSE=tu#zBI;q;y~R!&I2$h(u)9F9*#jdpHerg?QdHdh`<>y? zL0tHxdGVX~C{OlDo;=*-@w?dG+NA@?=s>vtm{g%qpD}V0%i7 zhS^c?kS&>3$F!I+Nu0N2H~b3zYwa_-L^&T=Q0e`YxE3Jhg4ly_O)gx*c0O2HQzL~u;QbMI1 zRUYD8nZYYRg3L>jh={rr&d9xrZ`V?58$J9m>MabCyV})31>f%w)JD>st&_?LWN5%vi#0LVst~Z6^H>xZpeE{PI^!URB=!W1SU;Aeycg*DO%0#od4IKz zl&zDOIm875r&yt?DrowU_%waU4spO+^_=sn4PlZdM2JLigG3M7IO6*KY&3-Kz@l>vgyv7yz{%r9KPLaFgnK zeu*CzYlbMZctDU9da%@la7r5LV}BD)3F_l1LDA;2-!Eba6$oa3!?a3R53L5!a+9vpzIC~7E72>9TA|23pDDeER{X4Pdvx3|MNi0m!;&y48g zwQI`jGHd4ZGj>=(nEX7>4G;&Z`?y0n>m+?o3N{`n&fd}*7Rf0)4y1Y+RevZVkZKs9 zd1Qx^5@J4kyoebNNVvXp&RQF4v6IA}F_0#&NPQYmWU;Ve6p;~H6JcNJyBWj9l$&0- zK~jn+4=Dy8K04?{i_JWyHQeOAf@hHUm7P(B=IW!F@Cbes*v#=)7`2bzVZvv1R?Q1& z*1XK76(kBis%TfDhmKa1c&5lW_9>P3X(Zhd1+gN=~E^k<{_z;b|ge zC{m~R=(JvnW-;;!m5ug=!Z2t~j`V?92X3sKRL+>tH>d$$$)0V5^)hK6uOb zMfnId6F9VLrZVVYL?tS?pGIk58(WsHWZSMOY_%yJx7KxEp-ER%8}9MW75k#Weuhh* zXd@D(Co`#pjx*lc&Na3x>I!CqZ~K?w=FmBv9Yiuo$27VLT~eTMjk_d zCQT^9GzDzPZxl*Boy8%T-`w0_Ert&}*Hko}EqU;8r*LL5UicHc*=?>ONGwNuthq|FxcCF4YLV2;|)%hordxER7yNgr92u+#lcm*+xK|V z{>*83RpZ$O>%w(-aNnkY{#&+3M8zouXTym`knWW8K2dG#uq7?GKf7Oz5KbUINY z(|%$98r!1*+`-|InKZA6uT?@MPgHtLa$qB@)yOz_@PkX+b!}bw-WBzlV7Xp@7G-IN z?hk`;-MFH$zL!oLyRZE4Z&M=ohA^1OOqeV>Mfzb2P$p>XMq#g|aVYa%5~1R)&-4^xbuH6FmWD%MC&{JwE}I-I)caE(cZM`Cz6;ps}Y z6q4QkVm{yCTCn+8+E&!fX8zBA4zX`!@(TDUG!aMwEDz}vXMle5ytPe^auH{K=Rhjv zy$(?=$`bb0vVHw@=q3?q#aJvyDTH4X4@AGMp6X^d$b?bBuSMGfDlpBTcIK%Mt5<1u z3IlyHLNb|(#AHWtgw&VGh1BM|Op?C{pPXZ55}{;#hSgQQ4YSwkoWVYSky~*JB&xR# z&o=`ZVvMMpg6yTp^O69=uA<`7aCLaA3fZGxK$>AFG9t4Qr!gD%Hl1-#Noo#QIMggT7jPhLdX z_gH#(KlJ4{wur(oI|o{Snpni37mT#_NMBn7>}!~epq+5oYsD8JmfpM=XPHz$8FN7E zx@dU&DBm48h@)WXx-IE>-f{;J5i1~+q~^A19S__U432HMz53R6Zr6^=IMj8L4D({t zhp`M+w~OUle?qAiDm=R>@3%#1b64)Sh4s)~D~23ZU)UDI84Y)~;_HH1?cU=87Q;186O@~JHYQh1 z(QS8xuwmX&g^RY>?MvtDgJY;4978>~-C*4{{krQLC%BE0d^Dlw(cmWr`1o6t-NUy$ z(6=t<0S-wObX&)N9gnQT{QvBYS~l8I!o%XtJ!SFiAsn2HKu}j0cTqx}Sw%lAU z@T?#17MI-S@Px{$FFLoP4P6ZCdx=Mc*N-Hf2FX~kaqE77KXL#V$NCIq9C2AXLd1_+ z#D*$N3=K6P-+#jZp!QWISN=l0_7W@=%o{11`W1>Khru2L#9 zmypY(VVb2zrfDkA1S?f=|D>lecF-&HW4%f;iKml5CU+!PGpWNYxN4kb{YI;SwOGfb zp@M^-xzg=_9B1G>LUDob3u+;outfU6d_T^^fmov=87Cst(V$g#l)|!3f@hQQnPW-k zX);L6ra-i&^P^8(!u%#-`i@;3NeUN7d~|W7u=}NL1lK^?3}TWdVQ>do+HjG{SfnO7 zyU6GifwSO6N(m`eUUYTCp^$-tdt^qt#o)%~qZclJtWyGq704U6pNPbeq7ROKhO?l! zeja+3;B_HJ1m=jdk7d1S1)K`?y-X>Adu|22$Lig@q_z3Ck__g!*LkSsp`4QmQvgOb zGi)LWrapxn59Tpy%m!A>iVEuxvwxok9xmLUqzEL(3z`@iceI;v2p1BdF_R zcB3zqVp=3AErQ>7Pidn?mc^$h3VD;pOWr_#M&s6QhoJfAIzHQy1XEQ>H=YcBu9;`l zzs7fa&UF^tAf`SFjGs|5eGka+l*oI=~~ydB?;;NX-0VClfVwU#L`l*Z8XO*?%UY^lVG7pE~n*(m+BF zs{!@Ll|@FR+DvA#5s?{)sm#eoHxF+H{s-AJ#RZq4+5-~H#7K44m$iowp1V?7ohcfHT2Yl|)tF{23WA1_tnlqwCUwQPv}W1WRMm zXCfv@((MFviF7G=4B6>+uuUkjEOg|ZG7qaTRT-f{Y^-#N3`f;!z*a0J54d2Exc&%p zrlK;!!dyfpOTj>Fl?*zHl$0*BiR_gD7$e52T4*mOR`#-BRPv8D5Zz=yrg1 zc2b{ZlYP+@Aju)>`9vm1PuRF%0GophBT<_%pr7Z5!7XyiKrKT@Iad&WLaRB$zye$3 zLe*qgT0{*Brz;NoLSPJHMRYR7>%LS+5&T1%;-Nll3o>FqI64|MXOr>F937eF48D`V zp)>C=fpBp#`TqMs^Ye6ab>7a*2h;p~dS;sU?O!u}ZVV83=e zyVDSmbp4=t*WOL;r&sNN-8?MRXa8xhN5hlJU*?0u15xG+UErYM6sti9GgZRl@pyvW zKFouS^7G`_N-${7?=NR{{?lmuY0#WZrq}JXZtgFdx6O~u8%(@$7s}5SB=J#fR%66$ zGkg-Ez~+Ft6zjQw?zrmZ+%!K--cL+(YQDJ|&071@{pFV|->P1J7L1%?Hw}qZb-(@o zUqmkKg?IpiScSv*{^sW5E7S~JizP>^)Q|Q7{30BC%|6W;EH0TJCK_GC!vubl7e_4{ zcyF7|*RR@?+-^2q)&r3Is0UZ79y+!5f;gqzA(QNnQ<|HiX0w9;tGamvEYhVd001Cj z8G#C!%ocvZ^ebm!6`QQ!$D zh)52cpewK`Ypv~Vtw(*aCGNyXMFjIM-m*gdQr@zXT=zy+bREdb`y-3}1G1=BWcdJO zX&r7^i&M4&s|si29ocNox1&Ek4sZK|6>4{@y`vCYZGMpKY)r}*+eps<{MX^^V>@1l zRoOYvylfFCA%6woQYQ*3c>Z%FvlQ+%NHN0xqL-VV8Q@G9M)OFn9>m zsoy{yqeX{80BrVpi-ou?WC-8%T%`VWa`V5Fb~^okay}bQKl#{WDUi^Vc)bC07KJ>y zU{$A-P#O5NnWK~Y>8Fpw>GjM@kIY1}*+oNW@u7I_?0o`La4|2>FmwF(@cQKb=Kku3 z(amK$o%O1q!Zj)^cp{WeUN0Nf?ejCY4R0n$X|D9C9He!$sKfvb@JY_woC6pOwuJmV zcuw1Y&+XNyLg?-A_I5J9emDI8@OC&JU40tf`t_17#a#m9I`tueapOgZ^6+p0a~3jU z&7$=@ql^tgR~bmi5A<-yf;LgF>JExRx#QL$%l=U)g+-{z{|+ARA#&_9_#l2-z0MW@ zh9cSyO!cN#Sik$j&w)iG*kaa;Yp?}JqrPi_OH9k?jE)x(@Mr$NtybBET*oUyW)F2G$ZWR zFJ$#4-@Z!GmxhPoVMB0vQb+V6dg>5;%llS#A? zHou8@?4jBE)z=YolM5Tt;qk`d$(Z`wp;sx0}V6HRSyc#dinwE4r0n zFCDt!19bC3cV;U}*;`uj9nzNlecvyxD0l(h?jW!!dHctzT9Os^DmpZ#<+QU}AH@Lg zcYN`0AgOVrIX*h7Cp5>^?VI!Fmv=wv-!~s;vp>IUn!EOLcsCohWBYk@bJN~`PG)vI zylubTP;7ji!MQ2nr7m0jCmo64w^V*Z%rC%S1rzOZ{ntU=kE896whrH-1AL3(E^(Z? zn*O9$HM}h~2lRT|QpfBp;`o-t!CBa5OKNR4<*5}nQ%5vdA#F(=JX}xWmej<~mbk6D zeM&Z!t-!aE0aN||rqWd_Z~Zm~{vW>hy4RNhGzSy`GnX-20uux>F*7%pQCkHme^^;_ z+cpq>&#%y1sTqp`z&mg5v~iE5?KGb0L(&I`gd~i36(sGrzrK5bIvToDKNySM1-Mvz zdq84!Sc%oc8}Z8c?cm9HRg>^71Hn zw&QXA=i^Uc-$q7ZlCTA)?WV(Vy@?VT{B2w}FgYIe$dvVCuh%LMUS@gcM}0!J;hC*6JGu*C zvxPv=-oRB^@WLFj6h_U{*Vx7Ut&HQWJHjcGOunCu)JCP4@tb<339^9UBqBEy7X zusKUttqSr0sq!#;0O@Bsw(~28v2z$(qzncxL4hy0JAu6!Kv27pe<|DYh}U9E>$zpu z^wX#jgm{Crjbn)KNd;)$1NG>ggOUSc(9uBo_dV+N1Ljy*}BoI4ibTuAtrMgEyv1gd$ z%rfX+IB?<&vlor$e*i*y#xCYQWR%)#v5RIkV?5jXkP%qg&k_YH_vvM7v`6Re;PBzlf6=3_~;K4h|?@k zLQ1exhCF>FPy)ee5h=o0iq54iff^>q_w&MY2pC(|e&MWyQAP8H0&2koEOn{mcnSYBz7p)*#|-t#`F&!zmWbxhhoHf5D7Cl>&L?jW|v~an``D#p^PG z$%v{F(3C5X@aYtj6t+%;m_;-UBRe{iK7nNF-*rFps6x>R+<^$iQYsr&%iT#9rcnZ)mSIxF-rYZ&US?mpTNa53eu9st zaDGr}jf07d@&Z!5?vLNTzyHWWC_DfasHx>Z8U^%=>l}{2)u+XNjWwO=0ee{cMmSg& ze@V>kY~*S-Z3$d(*5~Kvu(4xks%gvKPHt}<9F(bS_V#BHiKR=)!#Gc7C4MsC$+qVs zU8v0mI2T9bMoJkfscx-v2J@o{dk(_=|8q8e^Fhk5;Vp}Z{H-$K{mnGN6p2~*U~3=3 zZd{IltO$!Jo{#y!uX}e^GZLeDOlYds@;SCVo5#Zsqw5=_aBCmdUH=h~vmd2lZvYL0lanH&>?ak5-?#IkbO%f{nqg(=X01 zLa1}MdF35~(sd0-xqrL(_ng64-k%?K<}G$6l94{J9@P#anvPn->rI-D(rHf3e*|ZB zH~RPIVq@VS+@Kl@U)5BMIRziLz0c4r#mhk%_sRqd;QEdjT%nS}gCzFl0p*v{xlrMK zfr3t;^WSoM#7c$zM#~ZmYUXg&1Xc+fax^0w3kPx=UxkeGD{H&QP91f;1D6VizBB}= z8_#vg&@S*^^+l^?DSs`x^w&7?f399%-c_WU@uiBEdg6Cg#2sKpHQzc`8UPLaF5{Om^0+ery@P=m*R|z4M{tHq29OJ3 zMXs*zl;?CY@OGvb8FI)57`g8NqPKNTKhRuDuQK-C>%V?@_XzB)pmRLGf3YEL(<=ld z!{y}-v&zob390_7n?MYFUf zwv_lK6-`wRP7W9nnqh{^go`Ct{`~6&u9X*yj^e64BHSE=3RMDYe1C0!anEKV4#kLsdHD!aJ@re zzw(kg5qq>nESH761S5aoSmed;zKpV=>^%myhoa-!ZYD)@WX$AwYrD?6AwBTNa+QyP zGS+)LtpM1!WwoEmrf}Ag8W-Zq6G>l|n`6C}usO7zYVOJhQx-ihNx+V^K+qE%%F@t) zI06GQQ#W;yArZ0J+S6u%f^Z;bRMQs?uHXlvdBjpX_A^Fo1k7gay1q48&uG@bnKPW( zT~1{^x8O*2TmX!|q>P+3=8v@S6FdR7JFq)x82j=+O=j!t8#gP{x5YEp;bbi?A;tV8 z9(R&O7q%^n+L{)}Xr!mzwFZ5jnkV3jVsvv(`lXE-?XJ8@BBiF_Qcd^wpD>nt$X5M7{VthX8N zaKK{~e+^72$y%g-5*>dkrD;h9-xsDr^E3mg0&T%C5Ohfs&EwgVOp6*;0mSFW)&Sjy ztREOdXEit!mshpd+Q4JQ+KCiiwDr6G-5E1C4({-K?w>15A#(&d3pS z*EtG6_bC9Nlg(muEP^GZQ0@JxdwX@2H-*=<`zy#Nq}1`1^wK!-lQfBCn#u&%nR1WSCea@7 ze+7GbzSO+#m!E%zT@w9qIr7T}r;E?vdvTfy7&09mAem?>uCXf-6xq-jh|g#Ov^!2Z z+Z|hzuDMB!#0vQaRx0ycRK|@iYPLfddyx(=jgecYt~#F>n8pwatgC>AfqWGy@#;e? zFOscLdZCnXN?PmT?)o!K3Z=OBuvFrP+``ShOw`iEiz9zEP6f+K351s6?|$e9@R1Xg zCJ~zulPpig`ofa`WItcY(2oP}h2cg$CzgoDsJ4z^VfGzf{)30k}Z8=!R*#(=S&N~ELM8O`mA0Qu1Y1<>@jNh|c zWjZ8_AE$pVpe`B+l$^xrsN}oP*El86WFM#m;(tSB+p%%IR*|Spxo9t`A}o!JIlR$kl|~q1rQ3*l2Y+Zl((3|#%9QQ+XU4(km9XC4&Vr~j0 z9quY#GN4ODFrtNGHxy+~cPO<=&P6FLId%+QW-5sAJp{};-w+79JAvkaF~0#FF|pS& z5_k0yNg!eKLVt3_yW87aw%^z>x8J8B(|h(Zn_z~l(bN#3HxWF{K2TKZddz=L(^=}9j*Oc~3J?SiQ|~FIl|Ee9!~D$$ z{ZDYmaP7zDc*MS@`_ZXjJFI@884b8zUUM@Hu@(n#}0h3r+zu2 za(qeU8Kt$CB>FN}*rKJ*27plV(g308=-tu6rNb#s$FQL?;>S}c$TIdDH_ovHN9;;^ zUAk}*msP?99|44yb;1Npe=NnhDU@f&QGvd&8_E~qUl`W;&@nx6Ztcol56`y=W`P6k zenZ%+W>kEH*sf@Fv5HimL5tteA7CpcE4TF1laz0Xn7vx-HR?> zz4(2geKF8460P`m6#!$zA?PyVVE3zc1&*V~C3J#T*Z9&yv0pRsS<*XHnvCzWG~pj; zSfPJ#{3`{U&EN*`3&%bBh=kZ`ZE1Rbz$E-ddz3;E^ z5i3~-Y`k7y8#QU0Lwi`MhqedeWo;A;7O;)>-zs*7K_7b+K!~cE#aX{31sYM7gbbLxd6f64cyLFLk6AOjIVJgTWoSpO)#=nE)08m@$PS(xB47n{9#yS2vBSkqG?7Usun`ZA(41F#9@lbjB?*2QUiEDMK&vsPfG(2tKPR!ypwb? zX$kI!J5SHw-^NVO$$7zw^vab;_>>9zOTvd-E2GzE#W8iR|W@ck#W@d$>pisAVvjhIe4o9I0baAzIbm095L(Bze;`U(^ zH*xzQDt|aS0A$_m04!_(7EWFkE?#D404p;y&wq%HF1!G76Ax>1fC3{x*3kjz3P&O4 z=;ZBUZE5B9QRhEj0n}zR02Uq|F8Y6k14QhBF4kry4gducH!Gn1M@2IeJAj&_nKjVO z`+rhU^IN&OIq@n%KKCI=Wa2($E7ut$*FD0IEP&po<649PpQ9fTD>#@ZZuH z;V1y=R@Sb6JJcL4+&oQOfPfE!owXUz!Sy4=-N78_0{Ey7P?M1dC^-Qg{wB-+O`r$- z`)~j(j4c06_wVSx5?MR^E7-)$%+cP-#KGIz!4hC$Z3hG>Ny;<2dAZR8OdQPrBAVE_ zI)8q|n|PR5+nJbt5dPJ=2|!Xr1z_@_@Zb7e&0MUV+*}!5t?mBO$n;m5k7Jf_Fc)*Q zw+A}7xx)R`pSZOP(Cp*fy_x=fxwa0Do({hMK^E2y<`#d+Fn4!i(r~bLb_dFc|C{q+ zg8O5$1iAq@n3PQZUTS^hGAXz=%Sa&!V%e8>R$TU!7> z{=xaWns@*KZZ7UXf8T#P{ujZqumH@h&D;Q{Kuc=}xIfuH%s`93@yGqUSbG8Vm_Npk z1;G5*-+#Xud`y_Rql2CIANs$pm`PSlOjSpf?%$IC>l77r^aA)YaBu<`SUH#hEPpJV z>;SHhFaH0VN7=;s-+lZeR>r}?5y10zyC3KDpO!uTJpoapOn|*w+{C^(ze}(-2oAST1{J#$R|LsW9-Olb`e(Jvo|37{cduu!Ie=|NN*WK-7 z3ltnbcEREQQni48udb-0o%#Ru$$z++eC&dVgQeYnkI~vy(%K7Xu59gQX7%^D{B76x zYtihi9e~P?uGW7&EdT}z&;OQ`a4>T;|7*)wKW^B> z#l^%M?&Fd_5C_1QUY`iGTcM`WtZq zm_+{|E&!9*AH)M-lK3y;VgWEo{XuL1CYe8o{bLyappTsLf6zxxg+J&cr_z5BH#2}q z`43_RFsc4Q9~IR8ppOdbf6#{pjsGH^k1V=>&__L!KZqN^WcmkvR5$xC;`s2HIof?( z#D6S*-3OETKj4RE;2-{nu7AHr;Sb@XWs5(AkCrX0J^m5#m)p_ZhAS{{cU`{D%gXk1k#RX#5KSJ%E3TaeUBSKOVFHR{QAN)y~BAAAdqwK9v1A#*asW z$<4|I_>Y78=-$oK@gD&nIo$sNKeT)N1Agf7`~z7(qW>=cKbvFb?&9+C`|+>0?PIO} zga7qo1q6Bl&EQrR9nJVcZR$fi9&1JLJsA!r1?DIYw9;r8d{{%mc84r@zPX@Bv1HHufAIcSGlp2ixQD*7qXFh~eXz@RRC z==bLArx|Pu-U`|+OOfa7&W)s8fd1$?Am!CCSiUy?W${4eu!d6({E!veA^8Ee799r(wZ(`YaIynEcuWZG>x_{fOF;5FV81E1c=2-Om_LCxtpdx?6^E;PDn z{Ssnvcv-$4O@G=GJqV!hc4Y@}zCJ-QRhZ03$wMYvfNYYjDk@>kPN?PkOQ?>zer+uB z)f@bs&-m>ZgAKS%nBO-yd;Lw`0eCNNY~{%pQ{~3v#LM%y;_f)BQN8cA826YV(ap>t zo}u}CX@B5a=d4QlHeECPnd~(Vc+o@Hoc#0#sVf(#EPWO4BALnmOG37w{w0GCe|v^e zG2mxekmg*iGmN|}ozV=8&0OOyoZGvOhulg5aVJpU9VT7=O2s*bUo$~QWbrfJ=@XAh z2ei>973Ha_sUyfoOf|49D6ZbPN=@rZj zN}06CDp%K{kRWMAPGL5-+gZw}Cm-$1LAV_Ud^fzLB%-L+{xi#GH6oJtL9ilL+V5xh zQ1cVU_|e?F+Hq~cH+C8oc+f4OG6i=+dY%hEwrwO$B47j!RqEb^P(o(MNuf9KCkMI zs7@K8QfIu%W80vT{UEXT++=MhYj^nJvP13{^?ODg?OYx@DbZlsw?ezb`1dpWAfr^W zCwf!m{+gfiBdh4C<$9y%t*LE~WNi*Se1EWFWr>9iUyQjBS)3gy9p_mEVV@<~VXW`d zJu2cOo z_68kS*YrP`R!fWZb;vpK^yq>$BqGU%TA~&5mCmRUr|KESUw7ndcFS?}?`e00AAfz= zs7k(3PA7AI3%z1ZU5AS7rhufQPQJ^q4Kl9B5#M@esviMMwxvcJO^AuLO+#_2L2@&G zFgy|Me-6XhU8vKS4h}fgK{t9R`;pN2jtTREZbs3A96*ghvI-~bvE+(`?4LHwZx&mcAS0idGYKoig)(eEb3t^xi zN3udm3kPY#C)%zWf3=*eX9nCJ%jcKKTi8@iIu2!KBQQnS@H+ovb_^+3c7F|Zv`if9g@ zV2Iz$u59RXs)@))O-#Dk9Dlq{{%a{GM0U!!+h7l|KG?d!w07Qzo#j0ESf_1*fey_g zgv-i3BsbM%o8Z6)O*-k&c|mt|}A z4m{&5>CX&dzYg+VFw68%62L$!&j%6v5sut!xZq*f1{F5EcYVHv9@6g!>0UJpA3m#M zD2rMP-tJHTEWSKj^M44}lw8mkv9hgmOlzu!+(yabh9HIlONY3;q=o7pB8*U!!Bg=l z;b8~!Y5yI;zVypJ7#ZNeO+zT2eY$08Fualj{hR-CLWA1!;C}gOle)-Qi8VE<^{ohJ z&Bv4tl6epQ$2|j%x@N6!M-A+wk@RI;lwp`&7ZBY}ba;0cF@H6N@RdDU@%l4hzvnp| zt5s02TL73_!lsfq&5%LG>^-w}UWSt!Lv}9 zIHrHH19!O%0=pEsZ_!UrHDjkvkRw>;D|%>owoCIpf)|QawUsx?o~`B&wUKJBFd6tq zb*m8*^~8IO4S&aO~>DC!-#Km-lvf9qZ=zl7-<&s@AYdEKv?X)PGVVBD? zguHh?M1t1v;ZWgC zbwGe&cW6Uaw&H97fpB}R720vOagCm+WLVcGy*$Ody#_&6$1!@NC@`plmb{kO!O?4n zO%8x^-+yG6HyxfZX{0X{qviI4 zf8;Y0U+Y(d@1%#S1XcDMebu>{G}RAf7?~0=`hPAXS}X{?b2AB!NyXFXAl`e65i2yJ z`ECdh@?xNtvv9l)rn0c%&+=^p&arq& z!+$PEHXMcnJmBSus+tY5$VXLNK+Nr%IEQ+Qvz@70be*AO_Yt?nz!ai2h;VH7p_ZsU zerHlGH&K1Qe38;eNQ2kHLP(CMF)L1fZrhs99!#+uPcSKi1Yf_w()6d?Go^kX^-$Mdm z#jc7I0(Mo05{x_O@!xDnL+DmUD&=gyBC2p;M`Vr3gp}L-c$N5aZK_Ulfn0-}M}PYD zOM~Ed&yx+0mxdiba%@KS(?nX~Iai@IIP1soDJPId)4^SR7uc^nNFo*e+q(Qu9(3hn zP&M*m`|Zh-bftuA++eX7DFjd6QH)+*My!c$^*qbL6g&Jo6P3EE`Lo5>|;ZZ z0ewMT(0Mgnj%PSmT%q<)BlL$~NMyW?*Rrm_G%1=5#TNke3@H&JsK5&t&(Z~FuLN9N ze7{sgnGp2i+e8|LD+vhCGGoeb+ zOvKk6zR*6!bch<$#(An&ihm%FvUq{w*wTyNtd4NA4pWvd8Ie>kDA0j&z07^p6iFVJ zlWTquMM=g?JmdJ7C+mek1TcoV|tp@b_fq!##!Y)ASQY1Uc zBig}89j@1vudB<(Meub_YC;q()_y;tp4|LE^k9Brnesp3b14WsWzBM7yN*sfe8>AV zjnXtz?#ar&iL&&$$DGOZLOei^rsfISlD#CK?f7j&iV}p$Nin340y&v*g|Di^E%6W$ zG?f=L8prAQ6yX%(sejMkQy~Krl?lWbws@*CsF0b_v@CVchD3KSMwmaU=n(PK!@H07 zqTM{r?>EwVz}nTlTt52LUP#Y|hP~r#`UT7iYY~VUSgJowQ1;%DPvSN&E6DS2e?G4)lh8-9tk%iZ@L=LoF;v46&bCwm>-TY)3s9-}$M zGr}RW;O}pITj<@f&CTTd*M^f|d4lvhS&W2|FY8G~pc%Ooph;5w*lid#UG$V77g!pU zlWLXa%*dzd_|X+$**-Z%lXsXL&c2E8jI_=|2HNM?R)3Mw+Ki1UL#%CBQhJ|>=)K50c{lg0`k z2Kihj8++deuFUnAkWBW2C{~rO#Y|(Tf>2$32b4O8hPdffLe9{9jiRxK^yXbHicxN` zJb#zckIOmCb3y72cRBMphkRQzVn#8)Yx&*IewNdjRbS>y;C5Y~;Jg6equ`E{uF~s~13s>2}qFkd7tpp#=*3;Mo4yv&ug)0&IsHzYp1s1k!eWro4ptVNr0?aBQ&IHG)i~b!c zqxa3Ayw^qht3<+wy7n_gI}+--mF$#D)t7nkg^nBphA_}y`^vW#G*o9Euk4Xi6}=L& znt_vgJl*!id|=^cE^$aMU?g9GXA@_9jb4Vc7a{A4A!+03CzbIwTw32w$ub*e6MvxQ zpp%xy8b*iPL^Y+T1GlaBvGw+4O0-1yga+^>L4p2W)&-?~RK#DH=xS{ivggTcukIu=XRv)H3(pSmTgIj%%1Xwq5_p{B!0J zUahU&N^L`~b^j|(3npX`QP9$JIbKBace@tT9oH-T+IL^jpS&`@jmDG!HK+H5d4zh< zg0%Q*olVOUa6z83U(*u+ly#R03z87t!X-@4Z3iUcRX*g_WWb5A$bYjOY=)3!Odo_| zyz`gSR0wvEdTtb&fmrq83D`k1kRG&+(lk}I)+xEM3ErQ!?1i@&>uNA5{wsWJ{3&av zW8e8tQvqrkhG3IXv5ro6D2=}@NI8T>qF@USuT7mXPC*R_9=fy^X*HE%i4>~bRu}2j zt}KmUj10c#F@KJ%K7VD98q;2Q^l3Pzlk!V|$+s{{a50dOieK9@abC-cv5YIRhg~rE zuHaDkI~B%*x>mfNsqt=A-n0TW@FZ%|DpN?vM7Cd39eL;7+wgd;>UXnc zp?$!1hH0K<{=Q35j0^3K+!Ksq3zcDaFX0Ve?AU}7LmOCRE)|oC|)pl*IjKI?Fy$0gGpSk=x zTeQ_7y+tBCWuxO5%ivBK@8?77_u0>7s0Z$Z z^@#Sp8$iXWAkf@&Fku!xfT-xp8Rmt*E_MDqHoh`CTYnFzphd(!jB)v$z2}QJ_;O|a zcDF&IzdSRHv*V8J()lnT{ch!Q8<%~5LsLvtG`LLu=*4S-#nq^+5|>(^b!<;}ze|^K zZXHfa=-0+Be()&}FFU%?y{U=&1QD5S+bQLh8&%wyK5?jv@^J%Mk4*vLZpRgBz-^HU zeX6{-MSmTE@8}}or3qP#4>QR?T)fVmc>@f(5MiN6C+K~wSUI0oPE5l&!8Q*h zDP)gq3Pb3~wzU=>kwyXb`us(TK$WO=SW5#9WSy?uQaWbSlvsP<>XW%?d7 zo$zGCg8GJ{`moR)c68C&MX*V`Y=xJ1Kr1)yh=04&_X0n}HX)A7THN2{dNi(Yist;; zWHB|<+b0?o3iH(5(}+arrV`yMKc&$QpitZx`z$z*@Xi+V`!z7eH`&T@1xyMJ+X4en zaK!cu-fs6xgNRvYu}K|ZK*dV;jKcRMWw~eG`rpP!S$*%izv-Ewpt6KSV-CYwfNGCM zw10vmqZRDrRew2t{QM~oZ-lhgpY8Xce;ku%VD$T{56UlQ zSJCQ|grKk>GECS9RC1dDgR^jl)tz3<^qZ*&BP{pO3Kps!!ihUArnyMkvhahjC?d-E zCN0+|6kVUFza+ZY)@{#a_g|>of>PzO#JlVTu@jUJr^(%kR~lA4cOI2XmL!4fdVdT1 zNZ=EGam-K6(>OM-eu3f95p@x2QPTk9UTSlt2EF1%UvQ>$e>xEp6w@xzCL2CC+NgZ)WT3#ogUXoylR>Wz^S27 zFEzT5KBVT?`mTV*LhEiFSd@T^Lw^coF)~?i3>TyXFZl&;K_hDb>@pIYkJQDe-J8jD z&H7LOzHT7+^ssPYC0G3N0jwysZ=V@f>imz+8!8(mH;s`K$$;4Ew1MySK8cD_A@7>W zzD;P_*jzi0h7$+Hw74T@@$e_SrD)qpP#W+pWe^R4ep?EZV;d)-(YI<{HGfMgCqrdF z@Skk9BE0*CUE*uR&zP)>vXL`%&C2D8$$fy5DzG#cm@)vC9@Np%&v zMByogZ%J1(w9V3`FauQ{&|#cDO7PN~l6=nx?uD=US9@XbXTK-Lhlmx?w`<%7-W z7(Q$5t2_TYN?BPoWF`L>N>U>5_qrg&1xl#=HdidM6p9MLPr@$wm_Y+1#`d$&(Pf?~ zG}){9pnBhB|19|*m;P@N+2WeM!1qJvTaASNf~dQl2uGn}l7H_>__A(#T63o(on-6C z>kX*0@YA1)9*X3Q87Nd?F-+;82z8~DkoR=-4FZ@BN|Q#JUJ2qvmd7jX;i-*U!|wy= zkJ@GEqEpkT%q|OMvGJsCBX=VBX{Iw=ca%!{)SD@38~Y@&`Jy+7ibV{q_6T3Uyl;T! zBC+0ZCur2lpnoW@P7Faq_>H4js|J37CqIl&n=0X%=jfzii%8(qv+(xcIR#@nHZ@N^ zA9yEsmTyyUR70@JFmq6TU*qpjr^1`ZX#qJOYDq(mrz6|Ik%bbQa!oyBnklrjVKmAM z9fy&tTg*(E4rDM1Sni5D#xxu56h5t_XCz<5!R`2rnSY$87}*r*%gJ5hJ`v0GB{&=|976B%1SW@nln(RU9P-Fpx* zZCd#6;eXIgzm_DhgGzi@vc|S}GMu@#OlUsmp1C(*=XRHr^{edSd61=LBZ*J#kK}Rh z#$9CRUTYoXP1(n}r1`7tyG(vRul@#EV>F`$#IM6OV50DhPS5LU7`w0)4PS!N5^TpX z`EFC|={F!^OKhI6RPO)UTCE*1WLcjo)0pE{Y=5q~R7o7q7T=X$_Yhy|`vX;9hB)h~ zSL^0(Tc8gYJPDtZ*FW5V(S9H77y~4MeYem7ekz2?zrmYBWGI@y)-asiWG|#43p*}0 z{wMlTyU`k>ho~^r39HepkYU|w37FILTJHd z^ncinrV>MU@fNsh1CGGmSe?? zu2orLKH+nkG{*%6vRi_$dXApQlVc<#-a=N^d8hk!zYtH1PLVySh*AZN>91k7?xY=> z_)o3_`iiPo-1k4x___4QChMhqnx(LVKYvSeGJ$pe6v;eRldo>M5XM6JX?kqh826n^ z69-6>5lIy^{YlL6C(RB+Zed{?FTFGx-=Wv1EHWu#y{!qZwk}$w>-)Ufjp<_!t7ITx zA^7C>;p>oFXZ(=s&*w0*hsPZ0ppp8vUgx@MH1GRQ%L1hRSA+7G6fFEp-8I;pyMHQ* zjvQC^bHfXlFCg24*Mgi+%W_eM3lRpg+U&BwC&g}g%HYHAXeHMivJMz&7G_RW3NSra z&YYR)Ex$|~h@|d|6J11$#1npUorcIA5^owWdvb2vUf$u=Cm!Q{UAL-85U5SyL6Xh0 z$(bAzz0L5+BZy*ldw`7ojbJ%HVt@2Zm}b2JSRO`q4Nb-%FbGx|6d>h3u=0#ce3L9T zXN$zWzy>7+7Ip!B$Vx)n9lMC`GO@XC%1Jmyj5fW@WM#XNTk(8M%L{(w8u4!gICK*A z$7J>TQ=s?)uc-oQ;$13vWrg~)DYu%6%x-ViB{$?Bv0YSmqE310=1lJ)y?^!zkhzoC zA-dD4^tiC-L_?_64D58d6av6X_czVNAZkN%(3giO?0!rFHPJNv3w6_up1tImz0-nq z=HKD8_hQ~`*#ZqVS~=9TA)IpywOllO92@2Y6k9a-+Fa<197rB;zcn^(wn*WoyR@%}`8z1y*^LX{fc*S?W96muJ&yAS*sXZi+w(K044io*(YU_ak*0QbuO zHB(UZzQ3(&Nn?3xsGt>^NuQ5;?z0*9*6U7KdtHYi1xJhory+)TJ%3O@S<_kY<8BR_ z7ef^hG5iOhLhuCX+Y}yS`%oWTHW<@gs`%~o;@gP6cN4xWbYRsSck;%~Lq^HORpzGX zV?CQ=xG-IvSID}1_9zC=4-k9{43N)Na@1)Fd|oT$p+U~gYWuB$+kC>qPO9rTbY&JB3h?bll_ zR<`KyI2(If+S*jQ%R8Sa*(NnR8C4D|*hovLz7;YUvOmMZxVurh5!{WbmW*VZ%7I8QeY;E=pNpKmbFB_h1`82N)zhN(}c#xuq>6LB9 z%xFe}ei*lLuyG8Vut5{M5Qg02u&QU3K+0D%5U#Xx^ER?^q?zg`2?#as4^7U9Y@nYq zY8~n-IY?q>6v%l1~-|Y!73^)+3oaCJvg}Ut3DX_#LCccOQV*&Otwqh|{=n4nru3(+ILR_eJb+ zY^Oz25`VRaUDBaAj_9K>cDp%+nGXYb!^%%2E(xL?LY*X>3lOxm#|0Du>F)jSnytFH zNU5$uz4ew&P$A(+-^HA^lL{_X*l9tqF5zY*bc1BVHtF#i(q~Q*faLXXg@`0sT}|13bQMm-_gok`j)t5=+^p~j zSuNPie8wCLamzHaZE*AZvRZ_J9jXu%u7AU>=#&uva&yUACZ-BP#piBQ_n1>>OpdI7-}=xkML!1m;A@KnSWs^ z$tJidToUqnCI6fCLM1uvBJLZKgW--GhXCx0F3etvAS28SV-9BkKA5-F1wV*~caz*D zVb!yfn*RKBW<$Ys{q)Wq{$jbdD)sZ%`Rl_6>;_zdYxPKzlCjg#H=qjxzcPTWxZ^uJ z21;wt#W8`cgx_9TQH++!3Y>HJMSlisiRm=5O?NYe<-@oNh9gTnIvHx6vn_S@%)R+S zmyWKyg1WifBegi6ls!{P7PE4`CPxtWbNC1!m{2%qL}s|M%K4Bxahpd?#0x8Iai{{B z(P+UhM7gl%xjVxzo?)7@faYf?RPd3bH@3lU4@)A0MZXp*Yrudt^$fg^ zR%AtPpAk5PX|+@C)wkq0lx>jVlDxa*jGI0=*%ON<5x7jZYkwoh(W0;C@$P?eNx=Fdy%NASv#|A1ZKS8MtFw{Hoi>WOXBU2@oYac;OQu;|$=u^}ja+ zsjjpXY=KZ#+J6yo@MC^3DojyuC!~FLYF+9hXFuyA*N<822{J0!J50(DI9N0YPG8m{ z?WHvAp?8$gwD{bJ;_ge^-U3FpNi^DH1JM;t@|-&_mA|6`>;0wSe!2~P_zatTZOTD( zx*`so6VV`+f)}oV8(LOMV8Uo0ckw=R3lbqS+X_VIbAS5riu-E0#k%$nJl-jXHwTTS ziZIL(L;r*fik1vchj7+jEHrKsynCG2nIQMXt>#%IEd^r_Uz(l;3`xUMA@&&Hwegl| zm`#fDr4j->Mero0f!VUF#x4c!6V$6@MeU235_A!`nsR0}Xo7+@(&m~XfwbKaQ>-s; zvfc@)*MBWOqn40K0*9IC=_*c;PG?K6t@NScAL5UY_T7F9*RH*bew^vEKsh%n>Q*C7w^ z=45Xxl*C*qE11X@=gJiAvFd1Q6Xoe3@k}z~9)IDqv*=?g6#3KNAHS-f&^OiNlG@=! z<{dpbdM|K@u7B-@5ALN(FxfrhJE|xdq=5ndaqG2eT96ld2K(zuI zsQiF;N7zKzk48|&kAi+x>F5fR)QQE@JsgTE_m!mg1g7=Z>>5shQ>OK z$d}oi`K-X|+|E$qQljurMXk~oJeKe&3x9Or%)K@Vek%3_KYXYpVFiP?A>Wfq?|v;7 z_1dS)eoH|qRaSNChWF99E;_qmAFQgHBrnthWbPn3kYl zwjZ8u^g#Eca&myCdNK8m4-qsu1g!<3TOB%l;k(N?MQ+i`*98WYQ|%9}IxSKP%g#UdI2JeS>kuXjqOM$(LzdpomcM0pK1ZQye>{dUE z7GZ%JxpUy{g`BSF#bMZo%ALoFeU*V@l3$#J?|)Fo(sWTTHy2Yy1jzgfoPT-HoN8>) zhy0;Cr>u z@ViRxI5fXBA(C}93A!>%r_gHV%lxw(t@5N0W{qofkGa&DT!z0T40(>viK{rR z(T`-EV*M6+u^t@A-7)HvMSt6=x9(t5^Va-;Uuv&V2|co5Q?kI~5K3AlMr_>wbB_Gk zE}nJE!d{lhwichl^)8q~Xt6-Z!ZpJ-a#Nj&(U)SxRoXoXZz%XP&8fpg8l{;i!=hp( z)r>0z1*I(~%@+I50e)3xYYPrCm*N%_*Mu@Rr*C7Dd&yKnpv^kfpMRv!e5QJ%M)sw? zV4MJW94NGvj3m{fiu)08vY)5u{6GIl;)VG(l+6i-iDo1CRPkf)ll6O`daZ3S7_}0A zxp7xK2QYp_;F6dHof{Q+DuiUf0pTs%CP$|2SJ2s>=ygq)TQl}bU(|f~&FG6U22rn} zK`yM}C^^fkkjrKusedP|eVfkGcTaHUgE@jcr%Oxc-0`{{so9^McxH%2q%sXz?7_b; zODMv!#*Byg9-PF!d%7y$wzBKi+DdzQ#6tUc_{@w%-*_4!NmExu!rxYRt8ZbLRLRQZ zs$8&hT|Fx{>$~O~ZBkUG##fBMx%KR+Irt?AI)E2`;r*FzQh!noK-dIpKHk;T8$-=u zDW`5@`C1MGvp|xeZ*kWlaEPr8&K*y2hFWdaK&Jg1-kVPF-N~j0rs7K~`+>i~{$~Uk zovbTdAn#)ezJ+=Zw$SA9mj{INlmwjosS^J+qxiWpz0aK4eF$bl%am(F!;Te!ez4Y9 z-S#k+%G+*Z+JD?XUSmaoq50wjPS8U62bPeD5+Y-ApN6NaW z69zBea%X8T-$NtOFt2E`1@R3C>&zB5x@hWxQPA$VDSMF9Co084DFtoq5Zd1NdDe`_ z<7)YlvR=@O95RG8`v9;>1dK4GRCBtM8xpiU;cU?e?gl4Y>@>j)R(f`h_ek`ynDWrw zwcbdJ(|=Kk_7~Io+C(MJhC3y2h{wfpT1Mg^34i*Eww-oa71YzH{A;1hu+WMw4OvnP zOL8~loRO7$w49^utHKqGK~7?vWPL$bZhWv;DW47ISDbEYRYhbGusNi+Sag%cd!1H0 zp_;YQ?PP&Fr;(Fckn4LC0roXw4gT9}ku*`7PI64x6JIy~ zNpB}aAR8RI_11)<9}L@0Eu?3{TuG@+CB$wj<3Z2q0nxN)PTkJy7upf~5UL_)*eRQ7 zk$bktNdX zcQ|wP?!vM%Tfh#RPuYn-Gh|mPI-sZ`%w}Y-2lSb^H=~moS-NbcOwk#^aN%^DLY36v)kkF#S>iHdG48+9GJ=3UT|ZS0*vTD2}*0tT*lA9Nc3kc(SoP7><9$-2U>&T z(r@X$EJhT;z+pNW>%*G`R|gNe9gJzki)A zU8qHA|5%UDi|@SSh8+zml{!X`(#}(ujoR`7H@aY2p5JkMMuYe7j@X~#m117$z+cqZV#C~Jjwr$(CZ5tCO`|ZC6TU)heB=id4tT^cQ)Dnu+*R7r{Ep=Y}IpW0rw zw_=oHRYFHXF6-oY$Zp)hcmv5y$U{81iwZ|tp&?^rN$K+ZIwv)zk6T0Al8y{ivtGK0 zKht5V&eX4Xcv})K_dt0y(&f0bu!3}@QI+Tp%CQd4c_jmK4C3)B&mj8DPLFS+`k#tr zs4g|aBVjCh8ElCVb~ksh{vW~V*-)ZB1RD%~III=19KnzL*- zV|hD@Min1U@WRT9qO#tpe=vTDwWs8W`V3_`q>WAt4p!=+2z#M}OCM41BIvkBqVtnU z`Ea!?!Ng@xi+3gt2AB=^G;rZaA`f0JrZaDmq-jIN_5A!yV}I!0RpG~-8nxd+A`5Ux z5lnUf%X62U9`z2YV=g>$8xv(W7$o^LYxp4B%v}&OmF>{X`<@v~2KplS7de1Il9K;ZLzc1Lm>>=d@^8&? zxJIhD-%PJV0^5wbb8ukb^75#PKFE?3ZrahIwK>CLKd=e$d%OIh#{R}nV2u2qcx_F5 zYWPymNc!mU51>yuO8DCfm&&jURrNQ7!jEM1!K9Td1JuDd9^14S{Cdx6o`W%5f%Oc4 z3H3A8e>wXP@+Jfk?WPfgMebW12qdazDS zO{T?Az=lLgZ_S2nyD+mU3P{4wtZ2(oEMr`08nvMPs#O`ap!$M%E@3y2Z^HtU#dOU8 zjJ|49p$SuzlAX3d`nIJ(=uh0t&?Ta>awywilLU{Cd+BIg+AsI3OI_O`6+ zdRj97Tp+yu#3S*^48GV@ZauP(D3O(-=?Zl2yuiSK$B^8Jn_j(c14Oq^R_Zs?mf?T$ zgx5oC1T((%!kG4r602VtLvyf!xH0%);x`BC3$mr{zwnS=i*@il@^LKYY|%hn ze*^`Hz%iRhm#6e&)MYcj=&)ECl$T;$-B1_*egK$}hWT)7AGS752AS!~O8CjrE4aS{ z%ezX$zee?t+!LE~Rvq>mp#xGO#x}0~cXlr(7&+h1kA|EUf9m zS?(elL;oNaHUw+eyFShA8i-0|Ay%&#lfF=+fsR?iUqnek<4i>^p@A8i%;)E+vM!@0 z>jQR+Cq7pFIRyUjte{f~RlXs5!EQ9x;KShTtd*~Q2pOfOGqzv5Tw3)Ld-U|~>c9>! zq#8YlVhD#*APxM0QPaXmc;6&n@`@y0ADn}!h%Mo?-YO#~7wO(jlYTlc4*56L_eX?AzH~xWiCfegTJxMz6SiK`d$7%X4aFxbh3J1geHd0xxCS=c zz4;w@$~F}CO`LASB-%cqtxwoSB?j69LkJuhY*CQV&L+Vl$$IAy2gh=UN-Rq)LGlkU zG?p>`E0NxxvH`sCGo5;d(=WBFiTMkGBBb2k)Nb9!9LS7GQ5;@>y*KD4TT?nys$1_+ zMAG_{MtfU=MW_|kb{87kP75|~=z-F>z5W_QxDm}|gMDf)!yo4G@Q*Z8;OrH#LTAoh zkx~7ASsF^l%N@y1A{nNr?U*N4AG2}o@3s>81IOEQ+d>uRcy@NW)iKEOW8>T}T?*?& z8X~ep_Y_O=p`2~%GeH7&RQ`XNIyu%gXs9vFThm*~4KPE_+e6Bi%`fk#XMy*gIsOv!DS%^DJzjWA0C?Mxwg|~AcU+5e(ZfO;Z?1Bz{kNmgqG@taI=Ps5_w|AS55qZ_nNGj7%08;~UBJ>j4$b8s4u z1q}{;2eOpAZjcempdN~+*+J67aD*w1`K_WW_#ztsW*vAc+d*(sBE%!5OyB?(spc$x zhqdQk0v;7BNIW6!NM1>?jc)!g$TJ=X@gl1E=u}o=Ri3+WBbfz>B|rsFiLW>SeE?tK zR9#l-^Tct}-Gj#e3b3T2*&(f3EvBBqPUu7rki}h;vL($i)cgyHHR9F7Ctn)`C+ehT zxKS)pjmvpM^KPXuQ2VbzC)6gZO1g`)0YmG)3M*t=&X1`E3XjU<#^RbHW$~R?eN6B5 zRp4ndqXcVh0&i7JxpG-qOMY+kI4abQa$h%>Cz`0?&Li}`1^}3iabAN|z8+U^W*pS$ zF-2yTx6$fsbS!oCJ5urU8n(E84PR?@Nn*Y=DH@G=b1}s@3l?w7UdpzyTTV$+Sv6S@ z2avC-{Vi32hgW&!3-U-p%xU%4Sd~v$gKqE+(*aux*`4kGZx9uJ6Im)eo5eiYj8NI@ zf$N~m*YJTr42b-%oMjD3?!%q0lPh)Cuk5^_bdUexH3>%UWM8QZmAmvD3HjzDpPS-f z>y(cuoaN4GAcQh7yipF`>@Iy);;~ut2iL)DCvQoz86)dR4=>wSm_O&K-I)iv%o+S- zXAPwy(a;s233-}6rvRKuXJ31d<=*P2=|iXJncfA~29QYjQ2BEJgJf!k3vvo1dovu4 zzpUct;IC>|`?5hThpK#|WgMo@mt3~wI4ZwD3O#}1`8I|Xb~S3yT0~K{P!avgRDA?o zQ;}uSPERDiXYlr7R%V2LcyD9-BU%gme+;{37?dR!4X#a~VeE-}=V`FDuT}CE(F(j||z~j`%#v`eo%4~{X6Ib2} zP72)4>U==3;y|SQnXv9Y|2(2AXJGIQi!?$m_Zi;ld~ESkoz#-~lv-#Ty&k-=pXfnB1H|EuPT&XJIr8EF)5s+Yfih1bOhf8v^*Nu`WX5_JPviqS zOjlVA8%Er53)QwNU<}8qy3HAuZVZpnKlU3}j$K%cpoV8DgOiDqy3+h&2ZD$iI_oPR zP-CfbX_)4i_;Go|M?Y5s!rEsh8NhUl4|jpk^>B|swb;^#q}*!cu(nhQtN*^-otH## zjWnZ|TT{U2Bm**(3$vRTHK8F4#;bq|3>B;9Ys#_9_`Uc}nVX)f!xjZT-+ZY5SkU%x zUjvJ(2v)9v9fXQQe_C(a3oCaKq}g5tB4=mAw@5)T5GF@NZ_P_EDB8APxy*K}EaRj4 z3e4RRAIrb@Ox-@X|91TJ*`nrim+KmhrJu&myjs32i?rF+dMa4-=7&YzW$m1()uTq2 z7(pd1LhPTPJFi`jl!1~(E zPnbc7Muaji6Pxf>QS2peQZRj`VbEY#X*X^fR3Gf=Z1&t&=@Mt6!J2I0Xf!<$ab3%?geOZ@qNk5Vlv`>I7E^&tr2BC@2>x)yu5`V&gW8qL z>y12|_#gQ#L1@9cz$Km#c1$cYc1(UL@xMwDWsaGZvx%Oz^)pU|`P%axU=Lx_vSPet zSM#l@^|Pa}?YGH?e)?Neqwhz=VeCL6w7S$)Cuh7sfIZ5^MKi01n*?!E{u=AV1?0U7 zf)h0La`}*RQ82pX#7N~|by917EKP&#FfEpJ8bALnQQD=?` z>d-N>!ZSP!B>8--Rf6a(^<64XaA}=e8J#NTWdW0UYPV5dXb!I~kL=nVcX~C2eeXV) z_Gnf7DC=+hhbpk5a)gmAb7!jLB4iJpfL%RkmFf0odZZ)`@!(k;Aj&(>ETv{SW!9g^ zW7j(Pmbg(4Q;k<)Q~HscFZbjRadvy(I^9*io?i*K@^UsJi;SE7-9uXXn8AR{cODYI z{JTD(&`#ReiMv|o*0iIf8jFFyZ$DHDWb@{63 z$A314GllL&7qc)5Q+nTLw_v<_EduoDB`fgpJ98O9jzRp()-yK~sweqPKSA+olpX|0 z;=YHu;)gJB5yKt20cX?f2*)DRUS4D4U?&6d$BZGfFL&Hb%WT6%;=-c$uMuIA)t%TX z+1LO{inZEyAvh2(mYL5)BDA9)@^Ilc+yO6qFTa5^Hgbe%t(v$`VhL^C)_n8-^a(md zAiZ`o-=q`h*Juj=a1Jmj1dSQ|@9(4W4aVcN{e6VP7>#w?2+)xK4;P2E#{!JI-|4ndPLGY1U~P=$ic(+P7-&XOU{XGtLrfCx{b2uB1u!Arn|I=PbK&l5=m zprI+nlMvkz9%MdzXL+u)yH08CczNEK-MHSk3e1f3=T?o7T1D0RQxoq>5TGz9;1Sj2 z<&l7afI+(X3HtD2VtW6aM}1p~8Zm-#b`Buk-T#Isx`BfvRpf^R7}0_j1bactucJX? zK|p@dBB7x|gMoes=C1bm!l?iC1zQC-MV#G*EG5}h;NZn{4{T_mnOsH+<384}0dawW z2Khx!-v7yiPjms{9Q3=_-WTep1+=qJ77Qp4*dq$cPmHtA7_`fH7m30KPQsj$5(87Y3l}65>+!|`XaIir_W=|E#3+FY zEI+Ri12({{4Wbo%ljFG==HbT>K?bS-(7tF}7#03ia0EvHLOTznhX)!w@sBfz!J;oP z;uz4{pBZP{Y0EgGPkR4J9nfwf+S>!2Zs1XM^+5fxO?2V@Zv0o{+xjwuD1-+TJQ|)_B>BeloY;dsK#8(41PzC zjS+W(bn_B{Aorl4fPjVhf{>JA^#E#fPy=5spdWyIIou``C>c-~Nh1u|XU8qm$rhhmdezcLSS= z27sP|mCa8yqvsH2-~+0s@E9gQxCzy^R(}Um1ry~(pDD4M2&wcY1UEbd5HjFMT#fBn zd6@{#KYX0w$Jj#&d}7$TgNFt0t*$n(<6S&gOh)O&3PdYqTfE?C0QKs$hd@gL6P&t) zuKnG*|Ea|SLkY1P@Ol5q76j_`uV)It1Hk{fI*UHtW#E?*e3^VNb7P!C2JKGG5D}sE z{bTE^I#I1>&E~~5j$ef?I~o4qHFiI49pxXAj97x`nv8^OLMwXJi;Zvd4k^x@c+Hq; zpyhlVP%yc(vxfRY&IE@4O`93;H+pjSxx~m8RB!F#Q+#|(4_T}uNqv4Ss z>Z1<9q(L~z4VQGv>GSl}Q>0%Sy7GM(VOdgMI$Qi{mE5O1y>m21rf$m{a44{oCDuI| zk;Eo;J-V$k_Z6i!5c2tn#@vripHYy8o-}^p%(!ge!`w}%Hqn!a0SFB>vs+UxObcNL0fx88I$TRn=5zZ%=$LwiH@o zRnEOyQd05y0Cw2xXZQ2?u60b1`!eysIAj`OafWS>Ap^six9}bp?1)*bi{r2c?@O{Z z(gdCBXbHE(&EL6yoJDH2$Wwzds@HBR?$Hs!7`>$UA7xh_yNQATNvEK3B@A`8GnNk% z5FQD7De;saipv<6cDiat>HqS5*3|RHrl5VZGXlP`;~k&BTxB!Zr!o)J{&nVV8ZvFl zPSPOJkP?cuR8UYDC4rONRSG!SaI_E@H^_ixBn;WU z$wi%vU}$jEe$Og!)5>-f%d15e?WIOyU+GIm-)j2OP(G62K1Zg^Gl4k2+SQeXt(H2J zuc2ZoUGa38f!uRJ!JU=%6yv}bE4{HeCT@pjTlS|r6>Vfa9o%-r{0F4`EJ1{#?R}4) zpuTDBXREy$w5p^SI+NftMtuSPF>~#rPVtwf7Y4)Y?2s#9`}rx_``6HVmtWt3la2{n z&5QAjrYUEoBfo1~n8z^^{syt$?%JW~xkn3y!4Cf4jJp(dl#6KMo%O7wkb`D`o#8O% zhr9x9n)1WQSwEOn($ zfgk$8<4YKDSCtwK9o#fAl^pu9VASUq$suMM$)y&SJBko`tXeqEMhHLC!qGTkE>Hwx znX#Z5GJ~U~&72)g{xzt=z&ldOf{lOq=afk0#D+paCrImXvAU^sVRQ^vqE^&~7URnHODGvCO1LZW>mAnIjyu$580uz@FVKcX~h5))1NyW4U@*6n0sJ| zKRVJY@J|-556!N#9vP2TZ+A?0HD#eH?OuMS)OU5Xt&Atyb z4*O$~vJ_9KbCgm2xD*zoaY{Y2>8R#Nj1Z04eOWrPxJB+18N#8fI&H0?PBT`0nh!r< zyK+#gp}yPEo_X#Wmf*r=Og@R;H6{-$C=qbVweAq0FaE{4JRwBdG1rFn$W=L@Us!8m zZ&F^jo@O9S?%BkO;O-e?McvHEp8p8AkHGd}IsX1|-NYy}j_i{uvk!i2zIh#Zqo=;Q zpUhm!hag$0K6xCNT8ujEPb3a@XO{KBQI``O({0D6^Tlxrs6tV@#&Q;{lA_41N~(Mk zlLPsk{OU6@I)0Qa9ol<=9^3-@+L@6^dsdIh5UeV!9Ecat1f?mTk-koA$}S9KT~7vC zOoL~$)al+k!M4X5J^JAD3e6nn+B;*uOO2`GPD)bzVskyOgt*GniTziQ=RZQK>|ftI zBZ3M>EkBPS!B{d=^i05@sqAJzU+X%rCEFGZxu~<8HWDimp2hJyfb}0|f10h@{&-l& zN=dY9t5nt|`;aty0F!SGA_67AxbY@z?w)~8l0!G1-qJfJJR7vz#1P`S<)73yL4I>^ z*Xg?vuW?}Mo2+PomcxpvnZkAq(okH;nb135O%-+CB7zgm}H9WZ`@3N!ev{e5od7c_08r#Su7~_s}$Smq&uTQl7BmFVgtFA2?RqEVm`K+AV5SyVVniPy z!VQyTx1#M<}V>#;h2h);}Qyu zkRPsdGh*3Km=C|WX59IENT;r zD!1*L&A;`^cp;*@2G*$Ck5(tk7dAq9}O03ti zmx*Jt9+P9Nsu&$ID8Bu7l$(Ze6wyob<%J7badO*tkns75KX3-dmBCHey?CyBg%llI zT*JHs)&}CvfJoUF%1=F&s#A9(jP84f4(r)W36E;cAJ*)d5+IiT9lLD2_1&n`C$c8S zE}<=?&e7LK-G*uQcy5{tQDTj+nN{oy$P4!(yHFKY?{4AVj`%9v4HDm-o*QOTD|*Wh zn_XbE}=p%WaS}OA`i~P6NW;R*1bZ5*iB20rvH#p^SkzL0B6WVo_Fv> z|G67EV?c_VoyH%gR$|Rakcl!9?VwMeip`>VX4?wjGgfsyImyYg4DPU zjI)YmrT)OIfW;o?$Et>Kyb?29L%z6vBk6Ej$z0#+pbS!`K!=`h_DyFSJ zkAn#Cri+=AvtWUOBZIRCHk@2UWYvY%EubE!$a*Y$B}DhrS?y{~Am}Kn zZj&GAX0>K>d>M=tMtB!q^yFkeV5SJ#qx+MO_QrV4;Cx$Z9z9Y{c_%$t7m~mXsQNv& z8kQUzEAL}LxM*$%?WBQteNu&t5WBP2vBOKZm-S5j-0lh{byZN9eLfZM)c&K&$KFso zuPyeKK4+f-FC!kIB3Cx*KGLG3>v7*{IA1vPSLSY=I~(-BbOO}6Q^AW_^x}PPO`hm; ze93dT1?+Ija@hr)<-njnd@;!l04^p@6Gm3nC>wjPX*Iw$&DNBwf#A5|gpkHu-A^<- zxDg%TZ7#ikS|;M4Xz92#xP&p6%+azxJ~uSpxHr>{3|{_)3Q-{NCTCB-(&U!hYET*N z8jEZ=%6Z(DUY8Nu!J4>ssQlmak|E9(Pm>})vXa(y_SS$I-j>g~ieB3V==fUhtbn(8 z)})Y|Zp0p;+QprP-9c7GBEQ7;dExkl9kC1!WZz(R>+k-CJ`-uJtTT@~%{lREF~TCa zK~BjI(TH3-(b-RV~H3@-}3h{Gd_E? zK0m(hOm(P<0R6s$oMFcSC?SMbtv+p4wt zkA4{T3kz+^{`pA4j0pV?*CRRJ{rPZy4Gyl2p0d0f^;y-b(M7+0*nT8jpr|C7aYmP_ zb}`zm(p{%j`35RroRg?Q<|sm*^-?n65XSK3IuDY3hvXJ6^dhAS=${}!Zb%J{r8hlH!mXG;SNqgr^kvZhjf4I$H#3zKXZU3i{v)N-mA#qI{x`+ z*+adkpRJ7k@o%(%~;qPJfw#Jm^nj#LAe`to@tli>kqu5en>EceB;aCoE78PhuIe>U#S$p+=v`iklxcdvZg zq}sN3-n*_d(07V;bkfLGiDCM&wMzB!sqLEP?T2)$!hKy0fL<+IZfwx|*kAK#NvWeV zqWiG&ofo;388Kq6?T`G| zLBcsyv$6p%*!Y_)mOgG$idt`-dW#(NSr5t*@`X%3vZkOGQuE|&l%bxQP4BIoIu}YU z)tA4DnR=QKGz1@if21b+-fLv$e$w?a+r3BgwdeJw0GCkcC8Xr8#m5rP6GdWlk3wr` zWy+0 zeO4lB0)}R+6^pc)v_eINZJV9A+Fy#I1HrH`84V~fH6nT|A$GEH&*pttUW!T6wBXub z(V}*CST_#sEJ~J?lgFd*`)u20xH#7w3HR)UTWj@aunH^XM{EYw*5k|H zAz&0w0*1lb_zV>txlc`00U7%H<3R87irh~;8ST#8Wj%wtcO-rpzw7wGlYWv@ zC6G?mMMeJINLg7_k8hw$gw!tF1YjC_NJ^|@G`9r_H>)UYSCc>05fUpD(LuNUZgZed;H?Aw#2 zANp_Z9}{=s+IWTh(4KeZ`{MtOT5A*MKbpiDM(HO`P5+9a(X-Hp=Z6+|pRqxCrLt$J zUOAhx;uhq7*&4s`%3ig;khj5%wDsLqEcmpy!@P$b(U)z=PRrufClALEoatuioXk}k$|U=N|8AZ*7R}J&&;#T&v(MkjideisS;nC ziz?UCHU06>slqg$dNMv#O&LYR9^TmWJU2Sg0Ms{#?-NE1V*V$CYM|YRT5Z@2iDt_r z{Ci+i)Y-{lL?t}6W)C@p-bXsSAupH`C+}VC9wFzSnQ6-FT@c)%(HO6Djg%c_y9S`< zpk5DiReQ?y>s{8i?j$feLJdr`+jlD=7)vsVq3iR&~ZVb(UX&oT57h*Jt6`)iJJ3 zg}uQ1BV>LmUh-m+!JzH@By)R+$@DQjXl41&1c&yH?M%W*yzO7`kSdxBp8Dg)-*Qh` z*)Uz*gL!-k*o=ir*BBTnUJz0w!zX$Rzofd1SIt*Jq&*Ay1HekjF=p~QImBOv@L%dr zBXI+3ENHs%0>mzHkG=fx*<@Jhf-vIa+1V)cOF)Q7Av?Be=vFV7i z4-HGGKyPc+Ks79{(CbXLY^we%2-eAD9U7#C__fA)KF}g9dD?eOTOU;L50V_%)1WPy1j>7w6&RK)N)k+K*+~MRno+uxWUtgh0Eywx(D$> z=-(`qCwH$9!Iu#tU6cx96Elk?GqUR=!$`=uHp*?t-pf~Tf7mEq<(3{&d>qOz zt0VI*aCV2CFn56t!NkhR`af|dDk~$?{}X32|8FY@1_TH5f3fI)%35jwnlCSuP3XA%I};NT$mjmY!ay63#NT_6&fTBlz61z}j?aN>pgxEQv;OV9Uoyku zgK&)Yfd$_$Pm2G3GL-Tl_&O8qHZ%Uh$=LI23}^$d4FP{tWdm8SW;`UU9VP@p-o4oZ z>Gj$LQrPCHfr6XUSlk+RGVy{?&R=7pgad*FwXX{D zlz5;`ePBMWNRX6c=U}<3px@CU&CVemMzdb#ock-GA3fs+cK=}sDPkP{`Vljk=j(q` z^==t9uUb9teK%;D{ThcqdtRvxA-%c;5DicE#_d7_JKTF!etZel(&vA_;2Hb`f%rvD zLP-td+XiZ|-5mYjiMTJV{eTa2U(e<$?8m2$Vl{~8B?=?Z%7^eSei#z$(GT*|6*;wM z=Nsu`fdni7)Wu#Kp%>~HL>%q2gnJpW;oI0a_n z75U@Y=k$l9uO=>3IIPUHU4H3ld4_{vp1IC{aD1F`k-G4(Q62?~x;w<2x z8uyP91oOK?_mc0k!epigVfUp1Nv>BQ!XmG>F$k3NYrw$~1>#SHx9@jP+ZWH#x7eM& z>X$z4_f2pjFG}c+Vf4=3Hz|U%AF0Nz9*Bu4pRl+69{>xng`u{@B zA;CJSSW#|+wMlr)eE=Nx^KM{H%HA$D3klEo;w6Ly_z3Q44E7~8*7k^^AsCr6M07@ zm}OFr5b5o21AYaPPO6;Ow)gBNu*9^mY zV`-$E7N}rQRj{Xyce+QZ?mn`A@0k_IPyzD#EHRQXKzgY*zrgDjd($gqZ0{h|MV&F) zoU7$L@+ZF)p&M++x>+xGqwak%brdt_hg^CbT~s=ov#9p8 zZ-UP{fCj0}%t(4)MD?Q>FBL;oNcjVGTcw$tRi=2R&*eS{FNwTHN#b>JoH4@IR!tnV z2?jz8MfZGY1C?C8{HQ~sNceQ(o2rE<1%%Tk4k@SCn7w!;s`tB}R5N~8M$S|`!ht&6 zLM~pWPcDx&vE-fU2Q2A-02+tPmVM)aL>>lg08N9ohzxzjx+?ys9aYw|d$B!T+ z^KEyZ#rElfwC~yd%);(9Bcn=xEJ5H01{b3c<1^G$r2rpyQy&9v$-MvB>_jOC%Fi{^ zi=Z%S&hYRgZvlbH8Rw_Z^o+c^tg=VmStBp5uDZD+;;OSD#qISox2mC$L*w2r#m1#< zpdjw4klIc8%KX*ApuI9xx_*moWJdYzC7DY%z`-$nXza}+IT6e^7K%mCS_9#ujgDLJ z$G9?=Bhq_pPw^!DZYIctJ}l_UVDj+AA=vzQ{0fGVijD0TZ>OOs?c~Fu-=H-@xS7O- zf-1|zS=mxh2`ZdfP*cVxQ%#{ZmJi}BK#y)j!nLER<|hGWVm=!j+=TuQKdvWdN3OFW zkO5ZK%e9&`Thr5)G$HB&Y|v03@2y?HNh&ZD} zfTlevT1HcM>%}GGNzrij_rKc=vjdY#^Lq4_QgIB-mpa*uAcj};_DIj|) zVrh9E*4HzcT~3H++|>3TVo-Q~`fL}>{hpyPvS7Twuod@q%;J|q-dZn~=kamI>?sO5=GE8hO@89{XYuA*Zt+37X zeR50q8ZrkJpr{gOpKHCg6Ev)2j`rFRfHp-6vvPP;nV!9!e46>dWpLI%8jS^b+WPOHVx}>UXmMKsk9WwERR%8j&uW0LehhC8H+(ulOpd1_UR`^K1WWbdS60j zECw&YLkUt-s0ru@qmwtBP|td@E?|}UrYI?EV*JyqOp)vgIdPXX)M)z&fdaen3`iR__jSQ&D;jS z%Y~s*&_Kt~tPhU?Gg!R9bPg}GtS7Gg>a6Z4HJYDL+Bn>F8JJ$&+ z&hYiRbPv~*K8$Mzz^a?(?ppepHbX)828I>#rgF&e(4(`V?U@{41YG-ZyN zToD)N>JNuhnF}F$v$5r^n8J-&F=Rj${yeD z%0sw&?iz1kR2tvDdGUA-Z|73p{$7h6XiET=6;86sE$1i4lejPiJI9E*ck4Y58%TWB z8=vr&Qw(QS05{5b1i5kv-$YH!KTF4#5AD~_Yi$hvEI|eJWmF}^lvtJ>T=oAjlJrO8WV4u43H7|x@z?rWpMuQn!EvkKo5;*PC11_5^FooEIkW{QzR=lm3n(r z+(#gtdv^$J*O1ZR{rtx{ba|aEjp=uG8d2?Yi0`ZPT!&G^YKHIVIu_-@RB%|0>r|jK z^jA;rmvN#u5`{Ml6>C{tDQ#4en%V-04$D;cgqkm>q@v5!Ld$0h4tGz}E=WJLYqS7x zIJ7;cXZ2^^&=N;v(C@d8R8=aXOF~rtWni$~dFxepx?4g%X#`E`Kv*|rG<+tW?r$-( z1|_5Z$I{oyDBipkE@69`I?n(EYko+@Q@y}T4ZCzA^+mMm$QG6uglp29kUZH)EUO!=U!F9QPf`=xSHzr(T91l!wP6UEzKTa?FQ4aH$1uWTsA`e9b zjbTnRws2)1MRKl`#TDpsw%fv<1DSn07|`B>DzL#b2u zyY|WZRCrsMTK0`yX&^e<5z$eH`jJN$%5M~dxm2j z!9U@$x&CV_f|qd%G*R8{cGb_c(~qWlh)O3^R+ByZ5vceMOw#Lh?U9L9&nHh=Tclw* zbsNdCx5H9jX;xY**$@Dc0g0}|pId0F2c^IcjvbW*4W{SJ-}bMnXEFQO0&8-3#OcO6 zoYQz480Lxwfl$3+svmp=@`YJWR7l~j%=r30)&@>V$F0wUGYz`iUrkOA3$xV>%}I_5 ze=ECgp1Bwws!}we?<6cXRFbAuCPDyPSvk6!^juPrMh9>Q?%n`{3v~*t(q0q z-ehI!`HGGhQPXXBvapeQijtoJ11A5TmgaN(^ST?fKexY^0g8Izj}^ z!m2d&CkmjrxF!K-Rprb27_mTkGe~0V+iHB@5UG3%V$v1sp%0qT&k>V5WBe!$LDkCo zHdhS)2fi@cCt+*P}6pnvw=Q%vbt<%Eb5 zC4@cWS2BLU|JISE(Om{ZAvY?vp@=K$d8$Uo1<3nI(LW>S#t?!vY0k$G%V|mR+ zm8K^#T>FX&vDPbokBcJ{Cst*9AW*gahLDP-XIuxZm_7Dt~$W5fCm|1S{p?t>XC{p;bT7O) z{|S)VMV>5NGIyIxc@E%Uq8UlwxCo>;XX0FhQp}k}_BI(b%+T8PjKWzyodQEC$Lcomq|?kpmFB9BR#uuzy%{x$Zptd% z2R=oG|0&NjKB5tD_|YL1S$|yB`z(87nVqF`t*%|8`Zr5d&tZ)p*5nnXM?aedp5Lf8ejK1Z! zLYX8<2d(#B%j;%W>c?3l{o$)6c*85FZj&E2dAtV|&oTdkjeQUF91{Pdi$ldXSWbi+ zNU!9>l|xUQ>g{?*0WlLlQN|;E^bV96vZjJSkInYw$f;cgX&YDRr=*=KkPE|C8HmTs zmbtNepi@`-SZCH9%DCU)IC%f^aT2g9`CmkxV|SQAw}xZewrw}IZQHh<7>#W^Y1G(e z)7W;>n2nRu_q=DVGaqKH`319Q_PwtwHRXt*wmdZ7c58}VN#?3&)ZvIjMJf2sk_5w4 z^ueJ)&YFthP55G1u|`UgWkG)mCxx&;h0~=8oyVBRRa2!B|W{V{QU`Db57INO6kZjnv|Ui!;L6cVne4t++~28d6$&m7c?1 zb5bG_l{$KrMaCCrz{=G0bGlYDyz1$Vg0ODVA3S_kiiVwxYTpk2)D2eUm~p}o=acx zLo+yoCdW}`RbWl7+khCb9c8yw7x8l?Q=y!jZRc-+)bG1rIzWD_>+Z-{`S_;f zPmmHR>8e%@-IJ4LxD*lYzGbe}HpTXPd4`QW7KYKw#~HRhxVPTRzk6Z)gaM=qu?!4l z&nPGx0o94p`E{szEmEgBG(w2pS(t9Usm~P473zkK*C%!B8Sho3*LO+P2%HZtRB993 z>yTq80h8LDi}SJI>%jhV3QbRoIMqm8W5!dgGf{V5jF)>L;imxYc98Jp;kbB82)#t} zj_dkU7vqHv!N~cZv&1x8PpaN;d7Hr%{_Zo+`@sxPTpRW=2(<~Jh9-FS62ENyWwAdR zwZuX8I)oA3GoqJXm1qz=1pPVYLbqBTYn4}E6M$vAq|Mm!C2-)A{g0qCs~@*U0g;~| zMJpt*ZHYV0vJ&)gT(SM)&gCW;k1kJenLdbPEout=2=oj6QuO&t$BO?|(UVzh;OPei> zM(6_B9j$u-E}+_uI>MGpxUDiRom^sR-A$IIz?N(=voSBk(BX_vsnl_F*x@>lH~DgC zh!{Sr`FM3Xw;{$F$1&U>b>MyzZ>byB0_{TsOxdytRu70x+E$N(WGtu>1DP451b>cz z2{8hT@<+|vtHk$~&}uVNwSAs%{;T*qt&Jsw1Ec8f0AY{a?5qy-ui9F<;2Y3ym^O5R z!ufXIE=Ub&dNDuZ=q~M%sLGnOb`?K-j9PM*B46LCraW@7rMK_SiYNb41{7&6ICz)^ zJ{C~zf0y?nQdJ9B+;&BvSM|SzB+6-WR8$<2AU*lO(F!y@#l4XblIn-K5wSY|7=TC~ zDox0v0*)!S&;{W;3(iQ+g(iP68LT~!I3zqSQ89qx{;P-1wvbijv$eia*%aRwOr@Ap z1_tEMWGT#LA=K?A(W9=BX(0KPeXE<5sD{ygIoy1xn&~b#;_vPNUov6Msg@t*|R*kX=7M(2>pMkt(^ z7UbbV4lY&GtI}HIJNDB@+UMT)H-BGz0JZ%;qa2*s4y$wce(9e%-kv6zaXMoP{8%Km z%6DLVo6jngGg|*sJE?k>%G37A81Cx=@0dmwiUhNi*NXGj1$!)cyMNq+#EdMmwUPD@ z4LVUxUeEd|PHd+FN1P|~G^?e$Y4+_2XC1SgNy2J|#OPfOC8jZl^i_au+xLL3CS!sJ-@$LugXC1kY$eiPF%At2AryV1ioNaRBK-S>bRViz zZ&?mZ?#0sjt$g{u#9A8bX6VdT9WXRyLtkPOTyg8-9;QpcYXYf(bv z7Z#IEyWU;8r_yOxO;cz8uIz2zGQa*dLuJ3~xj*6M{yR z{BGBnS96aSCbo9;Npx4BrbO~Nb>^2t$aP4DR#JO5Fp@5qqnA@JY1-RIn!PsD(wZ`B zszs)`b>egsK;RwoG2us912lxnqZWPiewUmt-!MvkG@`=m*p%O8oHqxqS{hhR^=xVH zhi398PWNRV`YRhNUga*$abz0^uM7WaKjrEI3_a}>9+et{+Yp}GM&LX|;0FZy!*51) zFuyLiiAP0ghy=AeyhRl(G`i(+?XvFz!OSXoV?oLx8Top*-FGW$0nr$7YhEe}97fHS z7ufNvIe~)kUrc3r@9wTE2&oz>wQ5e;T#x=NKAAbm;rW0$xy<%rILbMq> z0jEh=PN?lDf@;*Q3!d6$ukq!@KKm^zh4pynaQ6W2uA#f)_YLxW8o>%1--$eQsvJk( zF$ziU3at%&e{@M|AeQ%t4YH8zH(ZkUbgA%WdkHP-lTCS*_w-;{ikE6`R^7^}*C391 zrQkfV&N!;J>%&HW$-|>vJ&sThrD7GSj#=`IV=q@Nvyn^BM!;Uo0zc1&efTf>*_iI4 zNhiX!8kuoawf)db0l{tmK+%PU__W`laXM-Xspkf@`H-`^q*C9$%0kfIs~4LkGF*yh zmh65q*ij%EMtm2u`~vTXCEL#cZH2}J{nxF;gv4lbLW7z&0psEM?@7xYidY+r_rG(T zs9bEk|KIB5Wo2dkPpcCXf`yfvHAAEef(B@VRL}W?#S|;%ViO}ayAMG#vbEJB0R{n+ zynA(pL9ZBt=@29461nd<($n6h&&O_MIsNk0a%G@H|B`>{*EbWmg3U*#fvCyKo(Zds z2jyXUWDI@>PLaCE>kT4If{l+d9<3-<0qgP;^ovrmVgZVuN7PvP6Tp?`2@+9MVgqCn z7gF-j5a1HMT0xziL3y@s+-`2bgFzHY60e0KSfB%gD8?a8p_EO*$A=1yrQG>oGxNH@ zhX?2X(EbL9KoQDgL68ImiPD8OK}}nYp`&PF;FQ65bwV5p*s??HLGW1AU{)tS`b1e^ zboqGKZkw9AIy-*`+Y_!CBVi53G5{c2Q7M$dgcmTL&Y(GlN?`a=v^ZzahkFQ|;QS!@$>1D^7@%~X#-|0N-vPV^kk4$ypwPI3 zYiGdc3kf3la}R-yGkk45xbO<460)Db6dbBjn*Jwm2X7;YuyWH*1PQ*>Dv%;H4hhi+ znk9hyuEGmL!dMOk@{RLZ%P+nh35B0pic2c$_chiU5b2Zs6Jt>x>>tbU-#A_ISDg~t zJ_uI;UKjsc0ks`sc!%h>FR){j%B+`bs;S(|)s{jOKdXk^LnA6BvIoE%%a|I38wKWY z?*J<364aPmP44f`9_Q9&KLF712F4Wd-TZA1T?3se$OiEfn;K9OI?azK!Di{77{I-_ zd~q59$lZ~Uf^dfyVU59BgM{b4qCY91a=xMqkaj@#p}gV!_lclEp5N}DQU=g_ri_*G zpMa?^9wSd$4_-({mt!IRPd#cTBsXyHpRO*T+PodXAiXP_Ao(l)0Lb-Md@)?ey9VS9 zu@-M7FGBy5Ril#HtI_}ykS8=`)HML}e62m%Ks_G9c@f;MI?!{IDL& zGfFY#!Okn`e%kyn49C+gC|q_v_prjK5bT@_5@f&o>Br-w_2XyEJ1%Zb`Dyt|Pftc` zALqW;GnP-M(%$Y#NmVc&cgv4Cn|?~bNycALwlSbaDC{Yh7kLZlxj%Bxzs0mVvb;Zxt*l} zwXfr{0&ckVq)7J?aZKIAa>a)HSiKzOo?1LyhFfT&99potPl=e7rPBM}cWaS#xu|I3 zWsnA+TR-tB_{hVPQ-t^RSDlgT9-Z7R4$Hgs>;wka(#}CbKJS{5$D4-+uXOb>iDdI* zSHV59FLi|@n^7HUrww^5u6Pbnej9s$WHbS*zZDUs`+Iisvbt^$;qo-Ida{Iekxl`T z$4Z8cEXTL_;(3Mz1DW23)`r%TU#TP1n?t_1URN(i`36($7qZWE;@`Z#USA?TUpez8 z^6pnYH?ZX$Ytqf)_GFY8cft1pp>Ygh zHX1lIGG2=fn~awai{~6tkVH+^nu@B~4~4kQKOb$fOA>oK2FzHxNK@2KLeVkJ8S*=S z1sKDU*)2uMwm;OEep-X#WvdAlDxwJdOuPMM{y>RuR5eUW>%ff`Dn@zg`bGsw3XD-% zG}- zV+{SEsDDfm$Jfm@I7DeWE_HQ3r7UaxJ>i9k)5^ip!%(R9dte#c8|NebN`WCwAOoLd zj)G6&s&KjPWWBma-xnY^_ho(~t8)p{x6}y-jBmel4#siT~XseyfT*-wpe6p zKYjMI8F7)y3eNp-?P+%`?p^T3 z{tbH$E`9&HWQe<2h>!6!9tjdV%0!%_LwTxlg3`SY({HP7G?C*h$i1LDDcrc?-1@eq z%9b3zC?lXmr_(UTS+v<>uMo&FTBoDusvpq!e*KET9r??^4^QlNc;uv8YhoqvW^D0OPJnYIH6`$8cp%%0G7FN8cQdEN>LI)B+p*cxNEXKwaRbvxZMp0i!&jt+Fn zFH@NAv>Pki<06;yXDY1uBA0%1(z{nyQTp`pEu}3?M#i+w?^dTi$L%!N1efImaa;<4T)3kPcB`2K;)5tH)Xvzxk2Yvm^q^xIu zl-%}@AKAj)z&o2Mi*~MX-fH~lPZLl`WWNdvjR9-ghkt~oe+WxCIW|+)7IU^_8|Q*5 zyT6qV@=L#4d26PEO;1K-DXv+%91Ey!;q$2;NnAWnPBQDXI&(iW1o;Bila7L3g@@$i zWX7juH1yzkLRHlYZ!SHVq}WFevorOZL!#`sCX5+2DTYFl+ zF5Fb^*!MFr|8>#$HMf7MB)a8__0Jaluzq>F1&;$6&ttkq-G%WQun!Ws(<9LA;Nhh>FXcfJ2fL*u4 zr>j|2Vrz4r@^iLGW6C>;%ZZ#h+kjMSh^nn9(XLdEIsN5c!)qhlzxydh&nSm2!D&qC z5~IzC2i_z4I<(^5j}R->WwO?IS=!Wu^Kq#v)eIzwcqnA$O!&GDUf|EcntAVz=5|26 zDT9{Nzxj_c zoCUc~6ukV5BwkiNnr)c;`< zxu=G#BdgbX&2!Z_PDjEkfFswHNVR)Ob+J&|)*l801U4yrXuDlXz5C;KYT&CdkMV68 zR6@ERFwnxNwLD`+z#%>XGZ~fZOBH4{T@Z1DkW!}qN6_AM>s&+-zlmaT9=}Y8YTJ}b zQ2Z%zBD-9h=&cI%G!w!Ox^<)@mkfQjB;nA^Vi_9zJ0)^Hy?iAiVC|$ftxn!z$g~DK z_Yrk$r3Mt8FeP{BuL^DaQ1^ezT@Cken7+v&-~nROFfix(xOBuzW%1e9%f*iwJ&p^* zKpbkEJV|96(eH@FTGq7%1%2OfycEWZEhlVhUT!?Lbh7^hnbQ>LgD3N>dru$fc}BO( z(febf>Q=ksh&;9dniPIY>5<_KIr<1scNxj#OeT-r>dlC?yvMm_N%+ji4+@)*l|jx| zuHX3XdColMc(%|t+mkYaC9zY~e%Iv641zxeOSwHe(XlVA%)q1G2tb1c4V)lmK|Q1m z6Qi|+`pZU+>OM{#N2gh!+kNDB7?-ufup}> z9mUAcRR&2OIA8+4ScP{X3?JlcN#ot2xzDT9Z#QGb#_G?DwwTN-gJ%ja7i8s^UT*fe!&KD>pM=TjVAzu2%VnVBsxL z(1r+<0bD6)0{h+fm@$*n(iAnn7bR8_j@AkjGlfKg zZkdDI-|$p}>*PR1g_3M)iLhIc3_-Vu-!iZIw^={+dn5?c@~cpd;Cxod`}zB*r1(Dqh5+IAgx8hq?BHM=iw zVB~!BIVmn6DGP~GCH@>Tg*1#m#g6w`tXJDonO>^D`0LNPtnmxe&sY_TeZ`dum2)8x zZGul|wq{|8OCIqDi|B5atK=Td_b}s#`ZrqmLMr6RTtUc}0tzHs=NZ|w! zbVJV@gkfK32^nMhiVFECK=sqE_OOo*K+o90GN%yq=xv@*I8=F?HU(FoN9qwn*1D`9 z86L98tm{?k@VW99Zs|u#=vc?AEcfOMvgcxaAxxxHeO1Kt5!}9{9&}@D7Cy9P($|?Zo@DR!8to3;!EfGY>0%WqUgbQFgE4X$?OcD$Jop=!$>PZY$Gh zLyNQ4)5;~S`55H%HACnqYz3Rt|%1HOr?gU-hB$5({9@ z=VfymvXnCynBFCHDKus#v=o0}-!Yvq1o2NSPqONk$h2Lv;t^eJoj2DROsE2LBd9Zv znhg%-yPaP3uBs^knv5k?BpCN42C=VT7LgR)@?9G9^Kx%n~?ZX>*Z)p_^b?lOs zcM){LjkI^oSU0V0{FUB&MGE~i&h$_;fO5F2l42G65xoETyCRv}#T;EZdT6?AIYaU? zdTYu&O*G?d>8Y|cERHzCgLec7nU+&qxQ~#YAN+ z{NPioMq|cKHArW>x6cJO_eNf#Q}@gyY8{e`AbYNFFuI2ne_APKg>EwS8HmpP%Ln>c z)s7rjb{o|5C0*rYzt;_|es)SK4>PBG`xVJ~j&qAg^BC4djijQs6OFz%%H2tLJ_w#&U8@xd z{igdir6iUhM7IL{Gl$xa^9SLUMAC0%7uOUsazfdO%{qtQ0QdN0ie|>@Um0QMCg@;; z_d)GKGKQYNldFJd*{nnqhIvM3;G7ysxC$&SwlY*ocl{TIVoewE%eEDC7L>t6m@v8Y z#V(s_c2vftE$R3CA$5u{s=P!xQ$u~2viFN3Kab_0n?SzQK4hH;Y+p4>n}YVv7}tta zIc0&OR%)jZ{?xxq%IpE%cuhl-@8%HQl~R6&vk%`7gp&X$V~`BB9LE$IP5l91RwBX7 zG~VSl38LNf06Cfs`N5F)vOJm)*7^vOlLI|YLZD+1i*F+)NV zJ0GW>1Bc6zhuztWYR>r4P(ZSiVN4|9#uFB6MZ4ZfHu}>vE9AwSc|cQzs-MfL`3!#H z`s|s1tP4}$u3-B;V=s1pawj)dzoxr0G^htgLJS}g27jq&%xVQCZz<-pwA**rSwq%_ zxJ_^{yIxCfroHUNe?m1&Ak5TEhjxn=~3K@?wMNGHH^mkzx*Iu91ed304N z3J&~Q*eN7+u|V0$tZ$RNh*ecdZ4QfYps_|qY*s5sF_F~(*$GJvfMCckm-9S`^UKN- zHSzMrt{3kC5-LfRjFsF?b1K!+lSyUS`Ou#Vi7$#?^?%?X(jFd2wuvZU2Gtw1BNdx4 z??=V8i+!BxH2U>d-$!SaE5bn^&HugRkN}!VUk{B}akdPi3OhxDdDDKJ_LjUm|0)ze zk$|^gpFe*uK!%-OTTw-yK5;F!5`#8m{W1Ec&Y)7l-AF&ZnZ1O%bg&)K5$Ii%h}*#pzJ~{nv+q%eg%G4eQdBDfb5e&-9Hap<{9!T%8F$Bvd_ez{2JAb_kmApuW z)5+mHPmhfHCyX^-*H4g#NKkU|Rv4h3m-ky6bkIYskFOnVD<(VB%h>dCUU4kT&NSTq zl`Kmw#J%4OV$!66hd0^EFO*ursl}>%xpHLOZFnrvG0dEO;8zr}^8sTxJR!qg^glPhxaG-n=Q8bRQht`Xl;roqe>)3OJ|^KmzzP!dOmq z2b>@P=qzZRK1Yt1e%?!NqF*XS-utUNBH;XI9#*kX>&{y~*CB}}9ds*&AKZY={AY)@ zzeQ1votwc{GfcjwIDT0&_$9`%wkn)Ldcg2sKeUUh1YCJEFOPM8yv^E2pJh*Kkx( z;Ca7FP?Fe=+NoT#2p%h0Mbl~nC+WJ*JQ347CYx#Sh?0RS+W%Ajstj%`;-);T<@zkQ z(qOy8G>Pgi&Owm9X_;JKADXGFKqn%n(tBgV0x1u=wg4_RK3}Cp0Ptt|Xvii-9qCW> zz#=^(nbPVy31lPc2TOU?Kix`T&*9Erg4eE_qb%B@jIu*{-DA7{k_u|_q6LwQ8)odr@p6O^F1=gyAm;tlZ!=Am9 zy!Rg*W@GweH%i2WiE}F<{>n6wF|-@SiS906;bsf6yyj=DF=Q5v>(We|6)9S?EJ+^| zX_KVUK!O%gjcF|P^6+}`_VIh9Qhs+W=HIAjHxJBydy&FlAb@IKp&V1;fqPo)nOJK# z+LS$ZPB3X>n-RTMq-P_mk2N>Pxb`RETn=pcBL}-eJf7|bwnosKVqg=*125DI$@+1u z5yM2)rW0m%YFjByHHMDv1ua%%RE_9Vn(ubXd$o2Zmzu$}^T7F3 zn*ddM<_XTvQlPVI>pV(5!EY>zm#D|4|Ln;y&jusE@^6~dt?tzSff}}dk!umMnetpu0O;^f?>8xham-2-8nSimdk(`S}JK}P03PtJ8^iUv? zGu-!Yv?g3WY4^ow>`1Kjvo%&=fB%jrnO%P*D71``hA?b(-}7pRUCKavcInCyny&8A zt*bI6Ut@E%PV1ak))_&I)cqDKMEjnU)&;A-(ok=KF%@8H+Rc3HeY~Nudzq}2iY0s% z-{B~9pbgl;S$R~IXHV4+GB>k~kDX17Dm5x~HHYlDJe4w1`>7bX->6f&lo3602fZ(7 z*iHHfNAC2%MOLu{b?+)o=nWtrS_-<)<8OG%ygEbB!M0_jfl(XA1|<@D{R;Q~ePHJv|SEaxfQHKpv#r5sEQ)xCBdZ8eA6)8ne{<<5!e z-<(%*2p!=DW$1D=0`zh(C}nOuGLaGQ&XedHaUMIKDQ98-GAl?P8d3^^os1o0Klfzi z2?FKLdXbE$3pQj1G}98r!L|ksQoeo##K}h$MQu^sFGe?Nzt`xSMbx;kiL7C?&>YPX zF1*BcQ1s_~1Qs{g@0u?-@XfYK!`k^QZlWN~s?3Yf3&%<0t2|EeQ&f0HqmVE+opKH( zWjCl)-9Y#Q5~im8MTF8*f1VYM_V?tNU@Y2Z<@<*jPpg zb@N$6Ch+YOJ&%;!D*JMZ2;s~LrDfP#V+p%HG^^-&Mp!`|7L|z|8!uWL=P`B04!}cS z;DFo)zNba&7lZVGI^s#(*Dcq~m2W1p60tPzqUrlVdV#``92Vw*qWBD~xu2OXZ^gVK zJwA2J^W^w_!p+(}`)xCO4o8rnEMd)sn{tK{b@ERU^1gOC&R!4ql=@j68GN-6c6|v{Ibe?RZ{T&&N?H<*?D3B)w=QVyL}JZJ)DULBXt}WO z4G0T#XxlCxc!P;{V#FaE%Cc7IV_(|#~ylbT-9~c*(Fu(z7u=9(DwI<@I+L% z(6C3SS!Cs-_}~FQomhm)>I$M#?f@@2&@6Jlj`F^d-Vy2wEJs7grrx#FCn^cR?;nD(|A zlR~42__bwUa1u1dlxG=R4Akh{CUH6?N2k;_qKR_ND2!DWt=}y{iGys~#v=i?%vix6 zYDPS|_!L-H!`2ap{ft%iNTA9gK^QaULRuqw0rN+`nV)5GSo=ncyRK6w?KexLK?gaP z9>Vi@C;vE`!aB4b6f@^oFEsDv#AdBqE_Yn?t7ewHyGPw`*pKxEeuEO5&NoEC43HkHy?13eYIC-9q&5J3#q*I-GsCU>GE8m=te%A7Z{BA*uQf@emQM$i{>;qYU%j_TFb$ zmeDCPGV-m=tl}6hRDIOGCJj}M|NAO)DrCrVGHc{?)`KlWuEqB|c&q!k@K`k6RPn2d zf;gUiM=rmzcFfcw58&v{+iILi0!5U2Yk1#&3xSTH_-waO~Y37#39LhF|0K|K@-5WSnCL6G^fTvA#EH) z${`5Pg9WXG8G&t*iJv2i=esdATtX%-uvETg{2lv-SK0esc1AO`i~D_8g{%8C%Zp_& z{#;W$9yArc#$sKMd!~OPkSsDm4sI2LuBaN5OfQlj zj0w6_oKn)ehpR{=CSpj2v$2Yj^&}%4rB^fZ!>aRRoD9l&s(=vsKuA&$TsM#SS$x7c z%+AO=ss(5=#nSI)w>q100vF4R;euwpY{}c_QsU%edHYF+^-B#(l37^gTiTjoSn-4S z?%LT;1YII`c4cqlMzML?zULX!IugT>(xQ4_eg{7iAQ;5EM1Taw$~2ye@WhmczXGt;F@c`XJD==25ki3?-&VDX-w_5~rle~BnB zQb3I+F-B?ftaq%!>AqnGd_QCexj8BK)xy{vn;Fc$@4;Fw*D}tPEMTCk1&*2NA^2S^2#kL8?f+$-Oh>$Uq9dw-V@R&lb>e*PVo~ zyoBO!?>Q#5m0(g51*@+s%>IURG8{&Yhyrxzs&JBI_jL>h zgK0}*(JVv7A=x~pBqq#~l+v4cI^ef<-XAE6+G?HcPqc8sKJ4M~j{XAdb?l6;qgi;FW# zCM)|B&91yOkUjS0$4EWCc6!;;R}E0obkYlQ8BZk3;H~|OK2tkn&YM#O- znU~QWBARgY`*F^aLAQeh)$e0DcawxkE}KtwWdvnOX+`$wdAK)?wI*4(WMRO5XS6}c z5qTD$rto>tEjxNWukm!h#A!+fo}3QIA-EJ?m@YEVF_X zErZE6&7K@_M)Qr@%N+k3sb-uSv|}sSWNkH*p73K&`uyp|THB9TSKaQ>8B4OjVEL$Ur#c+r=Q?At?Z<%O^*#w`**ydeY!#PFyK3c6t8r3c76x^V!2^&w&m+=g4y z>CZ0Wdv)*NMG_jSf)}*j77Av?s+iUgM%ob*{ow1!8Iu|td=>h(N1$B7AeD_|(d30L zqSC6VZ0Go9GJN0CJ^Dy7h_wQ|`2&pRY+cxmEeQGB1N17%w28=mhVTHwJ2itK`Z>i4 zM47q&<5bG@p8X(F=nM?3+ZVK~@SO60{16ymy#EzGw6Q-zoe+WX{+E~1#)1KTL=49J zzs?AXHhvChb4D;;p02^lE~9Vi4CM6mE}9|o#4Y*0uX{~}W11hleP z|3F>Xk2@G_W{Sma(D2@q`g#x~6l!5%Kwm%gq>Wd288_fGIlY^dafWB7afjD5ZydzrxaY7j0 zAOIFLNk8>XTbEERz#B3M;BN#8!&qVCw_{^CA_obZ7zec`LlIA+kAOD>Z+xgW2X~Oo z{^h#jQqGKH)ZOzsikgt3gNC*(mf47e@d~4(&?AL94#6Y6Z^yw#gQ9g(pNp>I@2(Vkfva`c5i_j$2 zZ({gW`Q^ykSDu|**GlwMK>Nj*nj+>6=I;$T0@}S#CLHu{!3e9X<{^peOC6xYk zcLz|ueo2-dLWKe!>2B{uH(yB`JYpt>fNf#cd4#A^FcsK0_?s^cPN*-|&TM1;%AETz zSq&AgQ=5Noh!Wi2^f!sf&o{^fXVnmo5F3AlRY)R*wC|xEpY67dm3g$G_uxKSOlCEO zcfZDG#h_e8olvznIDwe{Q{30jC#@QxVQ1!??^G6o1nqpTutAXzC~eJlzslVx6Z9j8mvDn&JSHH!3>cB< zu3+{^272r{ai98`oKAkNMDld%vV?sw_kQYH0Rw}kz|aja!o>8>9s5k}(QIZ#^S*;x z68!aB0Ve>J!2auP?S4lUu9&h;nZxeu$1!jU0=SBi&2`;AZ5DUjt-L&uL4|%BAQ{Y( zfske!zBQ2krBlX`2Yr2pay4kG=l#Rx3El)_W6_Qn8nIhzvLJs_XQQWwJF%GEfPjzd zkKEYfhyCikjFjZA|4&&iII%`Q9mfLhitG2vA)+bk$ifz4R0UWgs*RWhi*_@Ic!%kH zV?e;?MZa0iy2qdKy85@rmb^dyE&>e?sfBF0SWoJ2+l|p!4hvHq@y8rN(zxCj4kZtE zWln;Q!Pqd#g2@)~xm-WS9I1*&;Nz9Gq=H}KltrtjM)c5uh8Hos4Nq-Z*q<5ELO(*c zhaM^?_a@rkMU{bFq1TOOLm<87^nJwJ5m0i|uTRvg?Me6Fb-1(&^>AE**^{aIG5sRu zTa)G<>K3@~hj_b(B#=hS;Q9m86Nmbv$`4bt;eyGYP7=iNpOQ50n5VembWt7Lt(6Jx zTBjlLX#KQ@{XC<79|D3gxzyOSXJ>mn`m`U%0E%R^DI*~n;Vqlc>J5B zywJqX12;0zd$oy3`e3GX{1#n5CTvQjBMcC)>jotuY;4`Y8r*)Wt5Ai9!EvMD=g3t9 z)10|=k^vbW;t254cxOneT0yzAE{AEfH53ue54AOmuhN)LdLK2+Z+*J;+@~1yLeer9 zg9cKkzqkX@9g-3Ze@6YSz6NGmR~Tap=4l~pxb}Nl0@{_mG1i6C?eN%HGwq5?+Us2? z+wCb=z26+nSVbu(B9(*I@6iz@cCh^XV({af*h8K#4l5d$1E6}ezDGwwcG<+S9%PPT z+eiPZYIu|7R#$rG9A|kXfSANh`)Q91w;liN5;+icG{XP_`QnyG@C=YXTqVMavv8Xq z8b%!dz51~2Q_JM|7G?JddH>Xw{^@IBhebJEL|ycBF`yqL3Kte%4+A2 zv#fYZOBs*su_^jHF#aV$Oeboj*(&9R6A1=0aiiV%z`mmRe>8$&sepFx@wC6Zb5EoT z-_vsqJ_(=VGg0qzM`LV8p%67M39>FZ(spOVx)P2(R!f$)5vE?c#uWaAe-~N!ajMn5o*G6(XWKQ{+|Te*b}Ke`TcKkx86b_>SGWi7gAqtzC;1$`KA`-Qx~F7=quz zD`p$@E2#Luu0R`*$wv>PK|lJ4UAYew3v`EAdw|7np-^*vc73;|T%l%kkYTbhwwu2` zdCSjY&=P^^dQVz;S1r?Udhzavg^W^RD1uX@e@*?CX?dCc8uKesab0z>Yo&}WB_im# zn3*NNo)2bZ+~;h`pj#c4kgSw3Q0v|9Z1|QGC$TOw`tx zCjM@K?{)$lJ1pNf!jxbeHC<3f^TBJ*eI||ru|P^(Y*pwJ=&UVTrmQ_J+PT}e%AeCl zZX+HbkWr7XiWupj(-Y5x>Cn7NJ_dUhs!_C@+!}NRG>MI)%bI?bp1aD{s^1B5 zR{;R8VD+zk`H$cyju<~%B=fVh#czwb1$luniJP)+EN7G1TIG1i-YYOq-{U`c%t~z^ zN^#1J7BjrY6kqMqU@5tHr5bE*?vX1Lbw^vuuf5XBi5Z7*8)pgSDvRR8GuI&f?^FmP%h`oUC#Pg?hTq?bY6StETPYO|49({mU!kdo0 zA=g2q_=v9Z!-*HW_HFjJ4YhYCC7jAum4%r`CXN1jQ7iXDJFcyk-g`|kES2_`Lq9}&4= zzxr*I9TQYh<-Hh(=Kj}QT@MKs$q?}1VXR=2k;Kvbakb@qUEhR+bT}SnYmPM7IOweP5w^L&S`nQP1nqSiyP;X=dbb6tl3~jyFJh*&f&j?T+%zjj zV#o71^4ePf9JcG7LGZ_0sJlYs(UVw+vHPkRfr+Nro{5oBihmmunSG{Qm#IW}C2qI3%&xc0Vp?A~GD>L3ipYPZ8K$D|F@HLpi|7Q9l4OK;SF0MoBbdiGEEp z@q1Tn4FQ3Cmmn`KuI;*7{bU`EL*;rnm%ZBTDW8=eFV!ORh$09O!tF5lW-^;By_|-{ zU%}cfT-@5@)oxP-0x)*6F|TGg2c*7~i8Q4A2{PSgxu|gBle`1pbTcVQbQsSx8D3V) z6Yg91rPQm+{XYPRKzF}Uuh73|I)$};b-HjjTwjSF6f#Ept*j<+5i)SwyT}IcRPuC4 z`8t*g3X{qA1&OBrm>`PZORlI*nWiW8=iB60Dx#z7Ya9UPCZqH+7Mmh}F!`A2J-|c` zv^4D`Isvc2E0fMHhu_u05H&KDRZGc%H&**wvY-5g3YT;#H{0bHjgFtO?29YrHc`bo zN)bj{NCQeYGJQO7Dy;4Z3!UaGeYYQPuYQ;*m$tEzz9U;ydUvt)>AR8rfjjpX-p`Lc zti_9949klh{Vq_h1G)EqnV{^5XHvgATjY4T1&97MGOy4)?rVp;^*5rH+Nz6})W_}q zo~OzzH`#O{`CB7=emRZ9w+9pJi>SYhv?&dxC4wreUd07T-Fw>sVWPfupKGACgb$a0 z-2?YSxSm)Ohs?}O;XLv>%g}Nown@a>$}ei|<}2vg!$fJ-E0z zyJ+aWqeabK_>qvwW3?P_=^2IH04c4r=eJsI;lvCteV$U=nsPj5o)P2Ph-~W{aw_r? zIzuc)oMC(c1RRB{EO!#eD0nP2o>}At5Vg|9Qf}M5_EC&~r|i$~iu4Gap;=5L<71&T zT3mOaMP;0LJFz%9u~&|OOl z$F^N8qU3iux>bAhn%*GcX`9lR7`D~*ge+NnJA9j5_cPv2uCscy51Y5-qlxw`+(75U zWpk+L;0RTJ&349{19mQ|tEVOb2Djjcs7G`YQBH^a_|nCK&NQ4^10#^*#){pf#iiUc zuLm5<71pe?J|%Rdg-k8Fj2ZkuHrEBu#$(93Tq$?i1Tu0@abeFKH8yLntxo?^_YKQW zmUmJVwBLfJGNiZcWQ0JRq!vOYw8Gf&w2=3LhG#z_IL&8J&nnkJTbFzxU z%%Ksjw96r$*H}s;h0Z*yl2mh0XK#;O81Bh`%t?;2`m~Ni^)sy+^9F3yeygn)0rL3! z22|ejpCAP51dqm(6QeEzZ&^P?Ka~{)Ce_$mt`@(tSo`S_;NA(soArNE6hOA5CFlgn zwK+n#sfh|@`jc+e9J()VwIBPd&l&q`)TiH!Zq(B z0+0cksK;JX6`@?qfwuBkZO<%;TT$I~vdF3&7<=7WcR|wS(XA)Pr_JWb_0|3l33vwCQS!onm&!={ zB}3c~q{4$NF<~iDY`&E-we1o3&yv0>m#j>SWuH137X`Pm$PC*z+^ z`MJfdm;)vi?7{E8GZau|+@b7zWLWraN^4F`17YVl=Q>3qL>l&(DD43q?LqPwirpm1 zj#8XkQM77bYXuiS%`QsJr`zH%%DFL2fQ6*`A#J!OZjyiY5mk!@Dr+3&V$VFsE4R_Dze2+esl(W>0(JoF9 zI&bHayDd~5>;`YfVyRzH?IGE^$+ptQ1z%f#CcK}@@_xZO$d6Z?QkVZj{;Fm*I3P=U zx}&#m>r-XzEk@EEhZQV~hyvUFc7m(T=B-kzEx|`)XG6(7+}NalJ1F|jl1lTDjH(wj zYB|@%+A?{pnsKT@^P9w|g4$_!+~ClOC$x?Hw##E(hnhx8l>I>An_+Gz)m1@b@yC#4 zzv6lT>FC*qx?>?!SY7E&Q;uUvXhD}Rb79Te@%PwC!tCBYbINgz2#0LUXw$ot)tu7^ zVNoZ_h3kX|tp0d^D)CP+c*;D7eUDu$Ts?R5Rk9u_sgo_QgiYfN?K%|RZZs1az4si$ znP+~?L7&&y>~0JR*09<=_WH`xeyO6BFCJQ}nbkJ15{Yn@Gi~7oos|%eF00XSMPLMz z`n*v1=m4|j>e#vc(`#{H6-2RzL(+v=>bf5Sn&L}$Qg+>cosb{c4qT2P&^|xdw!Yhs zU+5B_>eH}Lq$)x!pR9f*kHzm%ndwY{SNf%2|1(Ru?gitRyvA|vQCtI+XQ~Imx3&U? z=SmIp={Rh}&zM726cZi~h5FV>T4UeoCUU=EJ-a_!VPL)sn#8r(3j$+CFe|4R(mK9e zD8&u&ir7bgxqD7XxY_B(QMs9USy8+*uwzard1K4H-k}|8F%7Hm{KhWgakwmE@=l(# zT|u2u8;_fo!~8Z zB0}$y_Uny)KoTT3}Pxtm3iJxG>1`Cvb%{fg7r`YMq?pSklKbP@|mBR5d ze%!wQV&(DAxQcZznYHelM#IIQ_`m;B=u(mF1^FcJKk|~Z6)lD_v3bTc?c>N4hkf2l z!soTi!Jcb%TiJSo(K&><#+>SZ%I@~P-A7YoXa8J1ss%Rb%0QkJNALwl8$w(H`$t>cj44J9eJ{Vrgi- z@!V-JNaq8Vp0Vn934S7z(xgU^b5igIs)2fqzU4`L zqBWcm9BX*oC-7qGV*>M?c3tD9*TLFXKgQx7a%p>CH1UBNjvPUWrKW6`3ty(EN62G; zcDAIpMein>-b<@FIs3T3#Um>W?^j(=ib&$mkiHy@NLwVfuyN(9(c-_BpJ>J;K~aCX zQhBZ{4D|E^GuVpbr|8qLooAHcZw3Nak!{~f)e0F^0^)E2k1@60K53V>e8~PiW5c$% zuX*JDdv*!+Pu3nTpb-P|6DRT zIj0en_hvpJ_RE(EO~`8~s~>J5iSxrQAI2n^BZhSQ`$kZ*(!72|>i}=?-SlZR+Hbh3&vB$bu*#Ce=0mwbYmA`&3oer5$+Bq2Tf!Y`e(jH?{ue z8~Ma7r=Ga2@1;d1I^Gd2XkE{LP2kENi$sz~#-5J*W|FanVK3=}+!O~MnGrmzfHV3s zM(HxB6w2!jZeb9d^UxB|R}04aGSv2imr{U4X()OcZo_jJ;RLCf6_x8q1&c4Oi&@C` zh9lD-?{Q5H&PvPik<||KI}oKdsj9o@;43(DARz2ze{nx_C9_S1rIa;)tptB|?;_n$ z+3|i&=e@xVd{sd@o#7%f@s2wqq|&uDXtZ>?Gi z$vM67)C}=4?)hC4CYBV#2qD#rva3__ipyrpZ(F%Jq8cUGpA6}Pb=ORRO&+q%vtJV! zSfhgEvo_}4`Tp1h?XTE8wHg4v>OQP{4{Ra*klN<^c`9Q*%zoZ zRGlbNV0Sp@bcx3;jWx@WjzYe*6m84A*fApjCHLFDoy1LvYZfarpAFkyua+RJOnm(V z%Xx{UKVN6b+wHB=iLSqA*um#g1?hmUifC9~w*r_`Rb40G@Qnz6)yK2XP2hZRO^J1! zgvIpPPFV5kCIP(a=Gg1&m+xE_NmfSG7h;spfdVebay&?hXlRjj@NIKP<;b-Ym=(u2 zSzKZ_9TMgPz+V9dc*TSeWC9`Kem?*s{0!&g<=7IM9f zi8g03!8P*{ZHDBy-?cw4_AbmVti!1ftv`$ga&&EmGiG*<=i~=Vcu+o|o>~lK-h}F9 z`TWSaM_h*yvMw#1@ZPcY$0_J6LADnu^)z}h@jY9g+Z0uQev$jmnDf};=3V|RnVh?ACW z_D~{)UP)4RN!xCn^CkvymH6=1PzyI}K-*2T6({faVZE2paUHDHPc-^bnjOc57Y8-j zQ4aARs68To&o$%+!xhD8AY0R1bQ>fB&4N_3VRbme#^D9c8wc;H`31?HI`G8;_@{0O+g^v6-mWXZ36 zm0xbjJTv6Hp90}SVqwor0@>ijUaQ19qRa=AX^X18E^-V$GyQ7RPgjX}w|wFASdx0Z z#Pf82UnXZ=IK@9_rt^$=@sw~i>DIgoNto{&5EjpJ*qM4YwtttZLc{1u>5=j}2Wj7a(s+ z>k{Hdb|>Z;u!zwu^>!rI-0X5hRj|+A(U=y0F=AsSFB|(h#Xn1ZNjtn0=wAL9k^t09 z*(6`Nl~8eh#K=I!P^$fIK8ZO;T-^!ZyI`!wT>|5Hn;Pmd{*osz5`IcU6UAcJrC&ks z%+x2VM_??6H8s{QImT&{akVMGFlXX?K^-J|OIwnJH!T04l~BcEY58Lm)|vZ{C3Oma z@1he%oczX`vX)GM2HQ-qGKeX0p4PJ2Mc@;SaAbrqTga{RL?BfTwp%NHN-@qJIff${ zEQmvsWVGYTC^HeS+?m;dmmOD0z72VLE;G^Dw+6enk73RY$o2C#9vR*g-8q-)n~#UK6!!zbxXI=y;YSPp%WRp z>K?bD!=xbWgOI=w--w8su%giE-~bKljZws2g!=hZ3hZ5pY6Wj@9u zg$I0V^e#}pzEW*KivM$*!rZ)Y3ci6J6~e9Q=X0bm z=x)AVnceNS=x@mIFEr+k4sZHZU1jHf%>FR8Vv1QL(4bw3I(HiUd79Jo;^n1;df)n| z($&rmy(^=^#Ggs9C`gua*%{k^m@al;W4U-8GoHsQ>((_RWU8JM#pAaAWh;m>MMGHh zFpYCro6J?w%dk);Wq*NraYqb}YEEZoD%A~1$@^talSD!fr0QbigNdAZMUFp`hC*~Q zaNPwrjqlDL)fK$u{*Y5~35haj>O=p|Wq_*^wN@$Hob$IWE2dG~W8h(bF+mapS%Ds3+oYZ?^io zJjj}QZC0z^4CM0WZUd!%hK#t?zAbB*e?=DW{kmBJ*K7Q_VrFaiLDW=FGF1|@$$Qb{ zLWA%ID@Ctd2X8T-@c@Gs*EiKA@0kz36+7^*UgXrji{7bV&-1E+*>&cUqfe;0w1hZxK1};DF`D_XCwMoPZ$LHC|Z<6Hg8zZhhK(f zT_>jY2DEwT#V_E%ewNwXW(!xWmu~NkT0AGA_+hmQK5u4CzY8z&yfl@hNgE58GEREn z@xz`d4n)*8Cd;eqgqL+Vj)_|@=N{uv$ercQ8?CTIy>vl;Tgq%=8s0wf5{Be`@6+fl z*M0^d+ksAx4AeBVC%hKPlcc-1!GdvY3>h5U?a=e9#x~y^YoKIKCj3r4sC12g3nnFg z&9JG7J9Dfpp80`8s-TDdD$sgf>`?Xo`VZ!v!n`Nd?Is(G*!pdaJUfv6qNS&BMWz8b0c=w2>sd#a8hteKsO+n5kNqdNKwgpL3Qfo~6$6YS0&R zI;0%C8>4aeeH^ZZv18cRVq~|}Sa9A$#=7xpMR_tYg!)thMee=D0KYPFXdEe*xIM}rH_++gRmQW=G z9K{FX7mxtRYv>t^0R#ku_yq(6i8wg)VJHXa-!Kt}0Tk&1L%=2e7eF2f1%IQ^F$FLR zjnhED0cx%e0FV#>Bq{+ClMoO92nq;1_!kg?lmIAz-C$M#4Ss+c0uFT{;*du;c_Lvp zwkY&A|9S;*LAU{+2M@&V|1Jl}Izo{!2pA5~0HbW7j_4gBUM4>Nuu4uHNb%;is9 z4`GdR2P2^Xbie@yfx=zT6|QhAC=!6)9H6JF4$yLf!vBEP{{Zd-{+~EGz$b8+vny0p;V%RoBKl9v28sfR2s{uK6&43TodHk} zh%N9}dwowQ=j5ZgbJ`4fNg%W4iVI8+}ZJ&Scs|bf6tbX}SP(%~}Mk2wUL;`3z35tjSyg_I~T0uR2D;WUfha*tv z5&)W`FTfgsB>Hu&XsH98kT5iTbeQNLts$;RB-&!X6@P%<_h0MZeuhFlpb(AsTcz?HE@Uz)v!{osZz!6aNjf^dY5fv+&)4YQfZOq2LjOsTwBa5Rznx?txt3Da| zi4_ftJC|#Wp}+K8HH}N}pP%o|>h4MvI)3%I6 z+Yc{Kyq6qOL$?}{`pH^gEO4(Ftc(k?HUavlKk7x+v&hMBnU?_nrzm@aErT&ZXrM+|g` zzTt&XE?dqxdqB#BWj*IN>pGrKki&YgTZR$^p$ zrJm)70OE%|6l8>al#USNU*D6nY}Spzs(&~!Xq_)9~vomZ2W^U^5GeHvA~d9XsIP0j#yNAl*^oVm;z@%L)ONCvAAj|%EaO!-U%#@wzgQNUKH{0s&2iiE)n&c=^2H}h zNPwU}ku2&+B>(H0v*TSY2{k*s`NZt-PRNQ2z>_S&fXT?6T6vC+64GlTWssVZT(mOu zc2%ra(O(kk?PK~81nsy2^sWSGjq0o{6n!vz8xe+=l%H0mA|l*5BY(WfO9_beE}xuE zb%Ln_3jO$*(@*m*97;2Ei629X$`8}Egt>A_d$yi@bi|FX|IBksjKKLZu=8!~L*hf6 z9CEG%KB_QrAqh-TNl9QxXL`FaYtH4*CW%T~oT?oK_?Z$);x2A~Sl5I4Y|hqP2Dwp* zK6er#{rvhU!?-<$Zhzc2m-vbzV(LqFD;bt!FU@^cVKUaETO2Rg%0T057mLRC4iyS$ zSQF@FCRT3Mh9ghFMrGXVq@O>1}yYv(SG}) za67VZFZbI9F6x}|W-YMi?anMg=vHUT;4a&w(F0M%dy-o$$=mIY1f1Oaaca$ELwfEs z!fZmr)w_G8%R?WTN|th}hH(Z*szSq34eGbmQocIh4~~0MX3`NdZAUM08iPinWr~Ar}k2do(LPlMWcBc>W}y{NNd;ExqRW@;GF$<5g6cztTddMI|bzK3BnOuvi$ z!BRQ0CjcspxbiqVbY*|KfD?1C3_AHFM1;hxBo{{V1Rg4w5n zTz|Rv2L^&OjhX}^(h8^Q7gn?bt{IwQp*RwL96ir{gs-Itbb+52jTX4sF*j8|r>^i3 zBWO($?ftSj6iE{#z7NTgQ9QybS{vbDWflBGLpPIe>%PDn zrAs&#Gj$fmJ>zf6?xQ=d`yF@|Jh3;HYkw?NM!4_X%cNwDouz-8J(SCz?iwVkjwj_A z(RSI8INv60OQ~%x@wAWMcxm*k7^gcm%80qKvVhw=)!EY3>JpK8gKr~B9WX|-+!e}g zJS;8xiEYP-bnU5+LBHcSd|b;?Pmr0Q)b04q(vo(k;Dk>MC67)g>DJ{CVN!~Xtba}h ztRiz9@)d>|Eghe39g*q3fk#xI@HmqQ3OA^8Ci)hnix><7ezIeMx&<=7D&u0ZP@kIb zm`tSHU#l+mjCVb6Ox%9*t#CqKNuls-_^k$r1Ex=erWaw3`s6N4v>gjyRmY@zi}Z%p zvpe^?BJMa{O63yAQm(GmdwQ|G9DmQlB$4i5+C=oA3ULmqs8;@qo#8u zmlsX5Os=e7W<78iY^4LAW8#K8eWZU++Om{N%*3YiKf}t^pjT$WbV%Jh%zv_Z@2=OM zlG2o#{eaIm2tvu(IFEZbDI#`iA~`;p!4w*P2Ao764SdV=9!IjT#~$AfBPwle2vA)U z)Dy`G6>Va=$sx6G3`kg0V1GRp{qib9iUz3&l0o&gZ%+xFS*YVh-WJZrHiRF%ePIcTtyWGaHJ`{lb#A_ZijbxaFo!%<$Xy&XF=R z6)>q?3OW^Aa+FRfi6V}n%ZXYqd?$jhH%SobK9wqIt zpO8bT zm8rGb{Do~-_eyJYdVgWUkkFR~cF{EPC?!7|84GIhZ&%)@XRfwiJUGUL6IOfzTJTxq z=GVhd@s?#_ZFY6z96I8rT~|>zHA7+~HBmM@`rczAF;0HZ?NQ29v+ffSl*fWvB|?P9 z^o5(&bcsgvA3kWDEiQh?OHkA*z$^_Qk9%E39KH~vJ&T)E5`Pq}f@%5io(S{j!?X7v z$0_$FNJQFYHLmY=EPldz|49#1&$fLzEx$6c6yO%!;LN80A7L`dfB3a(rY2zZV&tvH zXB?J*#+ODGKglXpM=J}poH7bUTJylKNyFrJIJ&<)c>O*9>9z^m0`-@fhXA*q{x5UL zamoW9M+?(yJAdL^j$wVG;g~Jz^2Xz4CUmJzU2(2gc~N$T9i8_u*zSefC|#fT{`u~r z_v|KLxIM3m7TE!A`QlH*p`x3pW3536WkH$)&lu@<&ZS(q^9zR$7=-a~$C$b7n{nk% zuA!$OMl*RHBP}_FzA>R9?2GVQpf9L%%$7cyeci&E!k3eMg8iAB_|v{!7(9O0{)9 zU0ELOM}M_!t}T&=HGACsvmKLeI8UKT`5%7%WcT`R5t`N#|L&pTk3wW=)^&K5UeIgoCS=YfXU zx-w$gIuyZ;Uxk0Bf6&E^Ce*kT4Jv4wW|{E92&D_InlQ)Vl4)D{ph>R=qjV2D_%^EkG;CPDc^*f?na=-P2vwOKzK1;R9nj&{75Bx}a0fO|@i64$t0>h|z zrZ4kW(ezH(n(g^Hx(k+>2rA${_%SkX5kC6B+`!4 zqW7uLZMC_LdS2S>9eT0YAcuI9JdLrF!kH>DS>ZPQB<(Dx@~j+<;WtloTgy{A@dK=P zBk*T=86V?bY%TY7jJ+<%8lf(ATq4Y^NQ-^D4e91i1NCh^sWr@hNzS2tSi(e}q| zHcrCM5oZk+aD4iSFA9duO$!az6O2I=XVx1#yvlSn^|>X0V`tVpCs~&GJv&0uv=SF1 zefJTm`!RQ?ct_wNBCBPNJb-Yfg^PPnh1*7Iv(0<)V)R39{Zko5KYxAyno_Q_>!WF- z2#R~vQP1?RT2_|(Q(x!b=^*TteMAoy571-R+gRpXeNxca!9lbW31eGpy~@`PHtfMU%q!ny3FiQU{s|CS@l6)KR10!Hk7viU57Kg zG@5M&{HoXJ9jYwrtm()UnemuOH?ZD9xMsRbcZ}FOKZ%X?6Mu&g#XJo^ZLsem(G;It zKX1rj$3pte>WlB)u$iBMEdCvSpM&cqHN-vyOo#P4ylCH>7?BrpS;KIV81&8YZR!*n zxXKXtdDw52_*1C~1pK~;dN zS+Z~YxM7Fw_IJP6Jt2OI{j7`=L3hYCXfMC($)^fSnRh{7E_y3lcl~fGm{dq>@itX3r`7`Qdwi&w$~eNhHXu7xBdDXt>1%!E;j(o@u`f(_#HRr;jXUnWn& z)rrsLlkSe%W)Qi{oeEW4q00NTp zXe9^{L~e3iU0po~zvGGAYTL9KhE6Z%x0hOuv-fcqQwZji7V0#K&t|jeGDLqpIO$ce zk~bR5B&t^G+E(!zdP7yPzeF&UzltC3e`Xy~!J)0DLtZ)?%#`$;t;b{*GxCy@L4cvl zYkyAGg5emDLg;>ial4O!$dFw2Zl2Y|Mupj$xpV-q?1Z!2b> zf;|r9dnCz1q=I_|h$tGy^Q<+Nw&qVc>>*1$dZXL35?V_2k=5=Eq3>I2Ub1-YcSm-L z{P_VzRmBq*Ms@TrO9yPkA1hN?>KuUFw9Bg>O51RCdn@grZs=afq^;?k{Z)A()>ZJ7@cPC6jnl*Vc&|c)wK{piASNoy|^BW|@-H%fbb!^WZNQ%F7-6R#UWSbT@bcWGKZszivn?ro5 znfP6bOA48n%qO#yrEg@;E3Wqk4All#&y1j_#G@%=I5FNl$2;(D59%?m{nu`-DYO;C z>#u}+J!eZ$I>4pl8;@uEE%w6CH-E6EbNbwzA6u+sw_Zx}EtSpM*Ler62jo8t;M?iM zE_!1a~FC?aHavaA zQ?#b@Hk6$a+Z!StquxMOC5E%JpUCNJ87rY8*HLK_1$FgTY{6$KQx z5(x;HNdq-8FqffW2Nbu#QV4lA0X3K5CIl0=PKgL|SOYaNG?!5o1r)c};RrxC12r)= zm!V+?6t^xJ2`3u@GdGj5K_(C}FflL+FHB`_XLM*FH8e1nk*xzMf9*Wmaudmt@BWH@ zJSR?|y5Ba46M+E_7%*U)0fyi~*p1PSEO{h(V9vvS`{t#jE_J(&Bs{bGFcI!jb#+x% zW>(%?va-fhvQpMSV*++mWn{vRubed4ag|T-$T(dEYbqaO74U+m(O8_Js~8cARaFXZ z4@P386t4zY+B?2 zVPRR48pi-s3W$J{m9{j`sY*LS3ebigjTz`Ak3CyyPq;f*8SQDzLyuPk>_ylEKpD*| zQe}L=6<$_WC#1-$$}+ROu52Vsp|Q!*n5`TW4dlB>M8ig2e?{sAV5AJNhmXisq}L~4 zti>J>6A=yQ0ZS!O6ik(@LR^AFHY@DG6se5P{FH`pjqpf%hucKhUnXl{p9m-`MW{ts z2F$0qa1R(EFl;nqk_?_2g0x7fPLf7Z3KX7&O?Y~AxK84s1;l~n1s?7}*OBN191X-> zk6k)>z+X9~f3PvsMJcd60QX*MT!tdx9gyiXu-ymXu>)QyPuOY=i-Od0&*8}j01FJo zF&%tGvJ$LB(Zw+bXYFVO%9GQ8YzPTK11RXEDk|6{N`eA7L_2<5`qSifh~bq5%{bnoF(WV_B3!Uf>>&40J|7*o(1+Lg-@M|AUFC(Q4qG-x}AiU%w76M~gvyel7mz*ZOKw&%{r&;nkP@ z=MPSr-n|#Y#g}`tdR!0Z_2BCK^N&*iwr+rus8t$7FbPJ60Vw_d1qF`)EB08&m3a65 ze;0Hzg9s)fNIQ_A*^}$>_~UnG!x;oS2zt}WCJh|e+0AByr5r>M(Y4tJpkvkj`;|BV z1A=CVmK~tCLnFa=NQLtG29y_#6y5*~@oF|bJE`GJKx-czRN}P$wrFcnN1d(isz;5BN58+hX+}e*;xp!4ub^VX0zE4dJPPO z3>(hYV9$KmaJCLIQH@8jMF2<~wX zw*gmmTNjZ!Zp(3FXq+$uZ-tm~n~=lY2Eak()@gc*E$nQ(8wB|DNCA98;5%^Dtu4;d zh9F@ZAV^rY+CZpNVX)?a!h{s|v>ucak_4mzfMgvCM`lCn1oZt9^}I2mh}K>^*{$oJxMXx ziseZ_)ja&kmbh)2fZAn_Kv{~*^W)0d0_=JRgph+rS_4oCcU%>f57DrRdeMovby-ivr zVY>Vsdj&HZr$2PSsf6pqZ?F)W%5KOD)D2f7wz~%QJ{n(&F3} zNb+t0^cHjQ9k}O#Xi*}k?&v3vhflh%Aa^V6eX^9{bnV!Th zjZn3y#$FPF0K${OLL^XKAv31U$klps)1a1Njo2I7f@u&;p=s>2Oa2>T61RFKJZ{B$ z=#k4J7YoXdMgSeXf3N;-|88$>K~f?+ehO5Kv*gaO?+iNcom!MX2ND{v3a3GLBJ>PO zvUV%kEw2c;KuCgXFce0`Up|4b&{m1sfW>Sl2@*ZSc@7;g_xOp;mn+6hv1-%H6c@sA z1P{wGCS0dG|qDnf2vsFA~@GEgo1P7%Jw zQc9ALDwGPvwh}BU1nX$pF*uAI$QtC6vkFhw7_yog!rp-PHCiBZmp7Za+Z=1Lum}!=NGU>0^A}Ek_m&rgmyVPbWbi?3J5VEw+UfcW61*$%^s=Efu9SM(2ztW43aS< zhWTLsp#?on#P`0?!Wo&RDmZkKL7`8ISO+0#h0xeif0`0KahQ2dXXOw_46E_YmV}W6 z_$sWiBOK46I~D{aCATT3n3|}=IzGPXDm{Ru#VZ-4l3PQz7~l2;Dzv!XmeXRw zC0LTxr61j_i*Q$fm$G7R(v2BIj~H8mzbAafC=t z%3GS3h=*S1?y?xsoE7dZtv4dvlERMwR4Bs(3mDH>4-7*(%2yOQY77(y}8b!Z2 zJsMp*Q{BLbWodggn`anO5<}XPe-EU1n!R#U>TYBL%T;74G3MctDPCAW#)~5SqRiRX zVvL^)*nHnDU`Mn$De1NHb5nS36|b8D^43rB7z%U4mT}mr;Y+br_n3 z(Y&VbjRlJCJ;kUozkXQ4#D+w^b`;_e|;SsIp$_gUd!WTp)zlRCL2X-OPDkVOElw%crmXm&)BEez4x52?qMka zq8h42eB2Wc=4bfQ7XtM?@o;$cq#j*-S>P*|J%QWcvwI33K3xpQf1|SplZ$a(0r-ZBg zOq@-}(}_42wICqkv-mvvy%wLR*E4Y;zKD_dZ}F=bi%T&PQ!$;?;!6A`W@0Yp_3!mW z%tznEd_0_g5eu>Se=@6UvG_U_*W!2aRebv=Q-3f5oE5m0^0oH&*!k?}_1lxT%ue0I zPJ`@J`0c`UH|&(@JM4S}3_U1VdbGyWw=K4wO)oEp1#|1kIdTel++y>WKd!z2H%o4X zePvi4u@Wv&+}+)a6#H;@D^lFGz=yjO`*4@y?(XhRk>c*|?(UbKd!BQD-X}jcnY^3X z>}DrBv&oEtME5DxNY(KY(0zUD6^RcA6!H=BFSLEbDBV}D2l?GdB&2@;2r}Qam?ifA_FwRZSkv^?2Au!vz)v~{# z@haxjBC~t8=pB#n*ygSValXJv3KC_a+V+l>YdT0kIZLL8mL{}e^1Y|Qsc2QsAS9UgMY6^Ent*0vIQznQ)TPAV zSwiIUo3Fv*pOSt1=4&-c3go+0#&&%IiN~+Uk&_wG{N`W-5K&@~{t53?=%npefFKgueZf$TK_WqFYGc=eK7R z9^um3PkwnbAjyjdebzYlbl-&~JV+<#tHG4r4-#f`ddJ7@%gsC4a_jW=i!m5w=jdtS zzy$Hv{oYvFY{|C%z+}mL+6g;{AI}~8i5UZS9BJNr=QOyc`d8*$7+IA)Yd)5QW8(wt zT)nGp?ZiwH>N|qhj7LV}6pP$Z)LD{oQ`;zq-HYw}AQJs-g1v*Cqy9rHt^o~3R~j%r z*CIB1?o_b9a~v3U$FRO=Y0=*#cebp~e5&u;<-~4IcgNBOorF4RCIfeRvg+{VY+cVc z-E=^y?(77*^Nj9|wX>2m!O{8xH15x}bTE*HS1{~sVqQ4ciH+nydTr0!*R`}J8ZpJJ zix??XQ0H~uhFg#m4&+k>?DDXUfcM?~-jpTjVSN{1TMf;>qbyZul{89EVv10->od0L zeq9L$Lj`pop5>Kw+x#!>!kJJd77pEAje1UWX9xNjkL%~%-7D}L%?<1Z+IO^9qN^p( z;EBV`-(6K<@ZYyWZFYe>@jmP)d_S}`+c*aYL6Rse)!h7NV<)~;&|QB~ec%EcZe%L# zA#K$Or?fV_)IM6{+TLn2S0*MTC~kQdl2-L21`eE$jnei(Ejz29x`_Jk9s zVU&41qHxEv+-f5!ryAP$cS}AY!sz*(ZXZB+NB#K`u_6kA7YDclI8T%eK!vu+1D0Toppy@vs4B zXuz(sV^Y***!UMqRjZ5L`;{hrR_8{Avol?-JpH8Gwtt9qa|%>d7~jQcpEY9NdEWT( zcOp2I=>%9Qec~QuLSV=JA?aqLaQ6l2mkJy#B1j(ekoSBAsA*&ggAVfbB|(rg-XuM^ zAO7F6(0BzGO@8;JX+2Do2^iasuM^KJJlsO&AHHaT1zPBTv=H|^L z)#bgb@UAZF8;#dBJe0M3-YLBa+6xlI*iv{u;Jh`ldH)IT{{b!|VyX-~G7-TIBFKg( z!J;>xW2t!x*Ee`B?15Z8|tU50;78wIOx{hRXukuNOkOvQ~*Oi4oIp_XKiH z$0~E&1p@rqjsm^HWR$ODui<-}iq*7ts9`W@gQ6)lQd=Q9HN~Ek!eI17no!}RlA+x( zq>_JYA{9 zd|;npF|MYC_MXzJa#I|GLtRHrg)V0;3_f?65tTH82TMJ0Dua%Ml$?ZaF=7&w=7()g zL_0~93n`uuE9|wS;UL0A8#ApzV^vw90w@OO|B65B!`c9)IHKb_j&7q=;0uA+Taq6% zMMvug)j&tF>1gRw-Mt^62;P)>C5O&|qe!!J&DIcOjxlZxKWPvYz$xM1V%`sA&bkR4LksqwGLQ%1732#4POa(Q zcgRw<`bL{RWl&iR73UET=2m({%SGB`+u73bwkP=EDY)+adh{t2o0={@u2!nDqMiOc zL|m#`{ zZZPiPJ@NKC@x7Q)|C>u(!EK%*&s9RJ9Pre|aANjShV4PU|DCc?(ziy=1^v zFzdj`!#%X+her?S^u}FInLVcSv3X!0p{>JjeW&$C7bV*gnL1za?fxkVrHiAQM8Na) z_Gz+iR`4~zIbZKFrARM^MBw8(s?F=dRpTD*Ch?_%M6mPY!6bx~QVD@Lio(@L` z>j#df%49&fY$U%1{g7|s{|I?{QmzjJwoT-Z`c9k(8Kf0jEYvgH=A3UejdUX0d9LtX z&W7#EBmb`3FqJp0U}(1S6<{t%QhWVFrn1?41uRUNh1R-XhbWJ&;1wB;NBmu{V=&w}F_G90>GRrmhz^&FLLj!F##!&zR9D&9)9UbBrY3(cTq zSYkYde?n~SEH||k#zCMPEs2ju7rkSj@GB~~Bkb-89?Yi6XR(~oV+Fe+~40oArClaTuGdEubuSh7~c)CCQGchyjy zD>~lepSa&J3Y5)EGH35tv)aW&TD4d|ch5?@539Cq_sQx=W>Fnmy!VIVEH{k&nZ=9g zSku$6ZK84CW+SgeQFy*gc5r@Nx--=L#9bd7R4dD^_zV9r{u^?7c5TP(o@@4b??mm> z92CRVEuN}Oaw@b8xv1+?2^{cx=S6z$fNEXoRL!(oLpj`ee`GM5fSC;L?`VRccAniW zxqG@BKpkJ_P;3~x}52uo{!*I9Ec8@4;%FGi#Hy7po zp?I@$NxXUI7V(>(Q+&y+Ro_$A;<+zi_i9RBi?>wwbVW}V0qImuC;LOKRg`AK3F5VB z>^AJw_jA%4z;d-|?KB9% zOP6Y#!u!9>-?6=S9m-&o{&&J>|D5n?k=g$@-?OnX|7SzvSCH0-jm{w2+712rG@?P< zMszo`fuDCzetH+zyd`I7XhuvF z#7c;tO|h6THA5p;!GetWn{6Hiu#5N>81fe;*J=EjAnfliGdK>feI7-zZ({Jl`{K(% z;oE zzV`*@WEg4w(Xi}1&F~>Jdp$Ti!qjr5n6Pgu!J9&Ew?U*reus?aKb9npF(Kpy(F}Gr zqou;Ie~OX7JpZAeMx_8o2`Kv%OcQ5{Fk&*iiBaN)Pl2goSoy+UGcXIwg7*qP#4jS) z$2bhwfH2)kqtx@s(mtK~{?3q8LUm1nip)Xgc8C{q9*2GpiPs^|rK}9E5yF?|N~?kS zp~54r9|GBwKb)5m5S4OI3NshLw=Ycv7ZYz<>`S4Lh)!u3y$hX-2^)Km4jB*- zaUHaP`u)rOq2%~nubP8VagF)7LBg;G&&BDJVye z7ezZ4p0Q8*8zdi+#?FK<&nNThCE-=EE?!VuBj%mk$0KLYtQA#odjmPzbjF_LS@YF`xVJTfs@LpQ$!t_E zjYhJ?3DmGemUS6bLQ>%kRmqxl^+lkF7C@zbOs!Ss4TXOCs+S&gT;P;4SbmCR=VhDf zSg#U+zv9eg8MT^9H>b}na{Lxl?8@NBk2(kDcw!S=wVy{-b9|Hf#z-Z%+bPFwIqJ() z)MQixRn)@gD$BL&>J~#P4H_y9)Zo4^J5pgP{zy>;-?`U;weqUecB}og~J7k**hCZY?o_vmaZ%Bwi~c)d1{lJ@NB?3&`yYKH@N?+nXWh+O8*K%kG>RoDF*-C$^R=Yfic?wIrs{Kfi> zUoh+PmJXH|YxR1cBSPe)?LW;P70!u^*B#cHH}^&YJGCx!wJ!>9;CpER4tpZIjs7xw z%Ua*G?-8S%R{*DSEn2+igehv1i;qAkI!4)6At z*TI{uBTw0{J>bY$9Z!2#SFe|D0^WwU_WII!Pb3FB{Mv;y^FQsIc9Pi3`s)gGu{H;9 z(zCwi&b%G9FL`D@++c%9S177%`KcRs2V$3$@MR5Utr-$_QhM8Ye*|p2eq5f<-3%VI zb!7>3@(W}zHeMT7ndgi%x0ZeLx#?4%P#{?+$z0E11lwuLcRsNt*&reLzV~Kj4Dxz> zyB*4JijF3+De(;qClfmsU0>edTlrBH9z%ls!M{pEEbmH!{2&O*T=#N)ynJ8Vg8N6-+#nz2e{@%N*v9j2-%1qBf4h(N@06i!ljh*@z)pu1=C{qy6?)p=js2I2Vy3CVf` zr6zV+{S@f=%q{p$8|tEwm%BO_d=(9N6obvSOD@BzpS=s(8q zL|olIAn!HrCM~i;OLNHNrm>2$)?DPv>L#_)K@h>@5ZEvdt2Zo%oe$+t)es zAS*j13suXX_UxuNOj{eKt!7##VZa@CYolZTsS}&$LERdpkA!fLlTBR&KO$^H^+`V% zrfove!3v0v`V}{{oy?iS5EY8650S*|Yml+%Kh$p0GN*9^ML|5M4||@{M28$^wqB|O zNA7p@8G7YyA(ZI%dL2Sf0gP=AyuH`YG}r(|)N6GlWL<@$fKgr}-MPvCVWkD_=+tbWYc!ZE~P-8U9L!dY~9|lrV&aaHGtiM5?W|F9r`yO4Xx~$za^bW)aYMS zRg!THr|kRpD$(1z(G$bvdZPXo!n%lYh*wk8e%(S|nodBqpErpZxud+Zp@F;I#TJL) zCwE0ZL~~7mbT9P*cehB>hhyx;CDH_G*-)0al1Zm5AOH`k2O4WBonR>9dv8PxMuHCN z0j49=9T5e4)Uh5oF%kEcV)R3qa@OZ2PRbOwBQbIne^WxyX?Pq@HG`9q2&B#%3P#03 zCB;62@`WAh=Dy=l%d83^2)@aU*lsm-O{JOYxho&t74~jmE^_>lF;GqufD!}Ab#bJ~ zky<>fPFoFIS;XaIavVW5F%XsB@~D9SP^*WicZzYw64N9DndJ5zReV=L&*Zw7=cJWQ zEA8M=O(|ZjRZ1yl;P_fB{(vSqpJ^xB5e7yGNO35t~ea+ zb7^@}Ugku4Ti+x7dN$2w2dbnu^Qn<-C7)(P5)nTjF(GJjwjd@QpKOG>;rglXspAp_Bt@+ zag}8%mr!8Xzgf^K9;qzGyW7~%8fA;@%0iHrS^04Yr9Zr`AMNr&{nD47mE~dNlYb{X zz2P#ZCj_U7cD zAFGwSVqZCfWMltr>)X?M_8-d%&JZRt9-Ef+gJpu`%y+K5_IqnkGz5#1ben+j<)e#pfkGx z_SxrUg1xlNF6?Ig)-{*Ax$c(YrQaT4-JYjD2YEk@&mti_FMyXhO&xVTG#mS6VUns> zx9I(B{ue|we4xg+#&4g&##;-HkSK1qJ@Q~XT>Dsq^{Rq0KmCsAzWGhYoU-48Z5}JO z^(mUzThAorHH+1EC`WBm8)gV-bsETw+;unn96T=bP5Lp1*$N9rmT+GEN3J-WB(IbA zm?hOcuQ?IMk@21d%=rnyZ_tF_$yfpREC_kdn3JQJxD8Yzd^gKYm!QhT6r!Wt(J6K} z+#5gn1%Z+yMzj@AyH1@ygIn2|^ z@}lkdQ4Wec*E;-Ba=r7}0mrnfnlG7U6$&|=@NOly5?kDD>$G}+mNuqL_J#4606kqV zcDrvTolYVMc5XpuRj)R{?5UzGr(B>EzZ2{iER{L7?yh2AVbdH*jIu%gUajN4_GLi z&rh$ZjGP;io=Zo0AOPJg)FOoLy1E}hijZx~KVtWq+AFgQ1`Qg1KZqVzbJhQdSetRP zike>&)n7@Bwk@5Raiv~2^a&<~4FU^E`jB(+|FizbvggYp@gk&EE$Swz{{1Byv6v$Q z1OPl0Om2}F+}~)*4;Pu-5eAy|ON#HP29dvC%OPnhA;;SqS2-wA|5$?=9PlL-d~8t{ zOe$IT$;QdLZtRAUxaHol4D9(|@*#rW3|Q*#B2eJ%0NXep0Km;5%^@wZd_tL0 zI`4jaO*aANldj>X;XYF+A;&f!055XwEVoM6ki+QfH0{4MU+#36^t{yY^$_d!`Dx7) zw$kHL(GAwZ<-L3(zwoGvfk^c3*kmnj--FX7d`@)kRYw15b+>R=_2%Z64&sP)Xk)?1 zxxIUo%S(E+PWk!EzWYobKatMEh^;d5pTw<4_-aJGuK%k(e@2$OHGm@rakJ~fGbx+9 zn~<`wbAKw+>ZIB%q->-tpHwAV+s`4v&do~7OsWLWByD4A`>)9M-=Y|)HXoO`C@ZVD zhzPqlKte)9m|K*Uja^hsT$q`eS%O)di(P>9{{un&#C)>)ZenBR1SDnS{_mP-suV9P zB6H_~gVE7DNr6*J`P07fVEvZ?dn=m89WHx^jKxrlL3@X|#Tp_S7sA1>2y17hlY*aX z%9F5Lfu*??O;!jhL}KjdvJIp{2&v*Bj8Z@-qI^aR*}00t!P2@l#$uh&4!T@ecTn`l z(-Z#$?}Ph3|3nJ!0nx-wyZ#F(^T1>O*I0v~x6*}2;4x_`IEgj=vVAXD0B~h83JqI` zXxXM7yw`owXMge;j-Us-o(<>~pxwh!?+BEHYx1YAM*wyNj2XZz6E36mW*FcsqidlU zA}mHWM!6*0p#bkXvJZ2x;8wcz0HP<of0PD66Ox zm3NnOMx`PWJSj2d^z>&Fa`|h$S}&R%`WZuNq77!c2z|~OH6PNz(09RnEeDLZ` z086KI$>MF{je(c=%~38Sa@dUi1VBq5QUGTUV?7dJ$pd+Gq5iLA8AUr1_9O%#VSrpi zMAr3kJ@wevS!sTl?^=`Ul7eKOMsL&8{& zjWF%U=@jlVI*80>g7zL(S^cl1M8Qu?4}fo@kW_*RriT+$q>iZyP@+aMxtdi$%GnIl zBLI&6ku-0SEsVd{Ep~kDe^COV+!!h`T;t{>(9IpP9w|z*KScY{p@#%nonNxXzGPb%jb)nEqpbfN8czPS&6K-&c% zeq@@{9$${UC$yYp0jMNAQ+%d10>h%GGVe@i<_Kg_W1C$fHI7dAqiIrwq*aCL7x5YC zGHwoHtxuuWBz1bE$%s8QG%?1FwqnFDWxCHJK0uJYXyxC*4FfmIDT z0los&;#vn8p*}h~B2E=lPn08CH1-Ya%gOM$N#-R&6ntRFxh0Fda45d=fC^s`qSe4g zKbPN%AH^Y!VTD!G(%VQlxo0%9P#}jaCoRE@Anaf&dsZFZ9PC7oqPP|(I2rf{=^}z1 zJrOD=*hu}Eux}t-yAb-k2J|Sf#F?vlm8t1S2=7<=HoOdI<4r_dPTLbbVkCVogv*~1 zoJQXai|0w3m_Hk{Q{*5F5(!f}>OxV0Z6Wn45YIheT=1jq;|EUS9hwH-d##;sZ6a>sL`#tCyU|#!o zpPz*%9L--6dvxtk%6(MrNXmU-42b?>6KtaK2e4=XE_T~2A}yMxvp^k%XaZ8R&|Cgu zkx{nmvyy<3D#cL>vF|1QtFlI!@Pw{G8X4$~Qut%^oQD_y9F{u*M{hVa>De=b1TlVO zXzA2Z?dK=~;WV@x+)ZMzaqb>&xI$^Iu@#Led)%)*0&wHf%VQqfFR9rdA)0-V6jX~e z+`x5ZCi6s?&g45;W{}df^*S4<1a3JdxnKTL)z1t4HTKq*_$HT%Xi!ejGy`Ka!JsmK zqy1nI5RQRCn7COD8BCQ-j|aGzFm;9vlC9SCYl&6f_u7}1+iZn&q@ubJ_$UW>6BrW@ z81cm$)cx#juWAHhe^9wGrm$Y5iB7AmX^{4~hYAGUp_eBafI1)-s2Y|(#s_cV(vvs4 zK3m`%eL1hDwz*PuW9^B8)03@tLfC&>Gjsk+-VF*({JGT5xt<4CK!8E5!PH{Ao)(_! zCRCNP@A0Rd#8$7xM%L0)-!mfkfm~s$zq?)ky;FpEHk>aIr^(N~u9r#!(j(su5_Pf<)?686?{RebL3ATfJvfR4%tayuJ2UN=kUiNvac)H>1n^!}QX~6P zWRIMlauzuIdrw+&JRz9Z4(m!-Xu*t{c2$KM3w{1npB1hP45JR}F=sYvybl0)FB|b> zZ*w!7-Y@PqrGsM#b?pACqmfNjY%A%sA3iM!o}SeL>3vZj6?lyTG{kWBIMaxn0B$2svQ{MbGs)!D&pnQn~>QV z%rT1iYtXs&Dv6Lvdd1z48aJ1Y%vogMsFdakhMpQXnU1(0|5N@THEtxG^58S4<~R*X zF5{$1YT;30%_u|p%(N;Tv#L)>w>qxIUjRNhs}9TUbmas0t@l61%?e8JREvJ~z82{x zaX`>+d(OT_A0{T2UG+h`OxT`xbvL27pv5Zbr_#x%w+q(AmR~2qPk1d2Up$) znQCI-QVzmAMLaQ?PG8DKK{;?ZUxQBeK}$1`F9O_YJlug96Dc1u8PqmkYO|CVpJJaj zp6jV+FFq}QlDk~w`YeJoNKAuhEezTF-_6T^?T}>6oKhfUt{3bxfZcU-epJZGt!YqX zN7Hd8f5f3!_^FkBCs8ewrduGqI=2Pp;+;Y|$?Wm_ycd7JQMO_ENW$Q$j08E(FU)`^ zp!gyq^m2?5gCRTl%Off*p)@PSV z21oeJ7$ByDAM|`hc>WLxrhS2uh%E2yE$3+8_eb#z6IGh%y|1zqb65JtvzMma9#eWTfL79ac1Bvg`KRg- zLmPZaA&udYLLZ-3h@~vXD99-ZYQjqogHgzfF-3lEtn|*hfponHB)aUQb>t<4Y+cHO;j#DQ7?m%c_=_6_QT66t6T7dMl1^+U z#2@T-cMXhY%6?LFxb()&hB1ZBB+Rb-9qd=HHh^_Fu;Nm77+Xo}W&{NxwV&y$w$xWs zDJJd93zBI_Vi^`W&@lCnFp6nQ?AM3z)PAY23GAe1&GSiB4U6y&b-_Mza~)k7x!@RoicbZT?Z#OlH_F9qRZi zqbweOq9T8)n{9P3u;;A%WFZfJ@tq-y#6~trH3?p-@j6Xq05N!}r*a_N!MWJ4enTiC zUV!WfvV>aYB+T%qc&Q{ht+CsWh%$y5J3?O+bs@qYf6Jj>WDV!OO6H`lL~zn-zU55a zLyC#hUu1P$!>)X9pmrPEs%MLQ;v|`PL+L^HjkK#scj)N zPQWee<03azpdX;C(Sl;nT%<{4td=Qv!64~#qd12?2Gj_RLTh|}8{Ll?-61TJSMEJT zYQEiogUawE^NiKpzFT6EH1$e)MM76x_CJ^deg<-8EdUqcyWVvp)OOHFU^llAr=_IO-`OEcgkH#|f_Kx@&3M-9n zk0~c#_~i43gY>cJ)$atN8TaT4E4Q~x*AI@Mh_wS$5p_&pYD;n4P7737>{Ku8evNID?P1=|%)f=4~)nB;K+dFOp`l%X4+2>x}|42=hMw5GLkSEZ&V)wS2%-*Vlf78=&z zXmJpn-;bUQpu+va`992SQD%NyUwX(;LWHNNrgx=1H30On_Su2xCct(SS zJ0d)#lPC55Pl_oB;G#lBqw~njQ2db8N-_YNe@UD?NVd>#P!Fh#ItYALkQq`9nCu%v zh#84ll+IoeIbhduy5D%I-+`1Dg~u>pYyQE*h=nDSd+|ZTnVcHZ1yw;GZW{7k26hU< zNP0|Fg^-#NoRJz&7Q!bs{cj+x5?;2yO)V;gkx?sWY?V4@WcV@cC_flnE2-}&)2gOG zFC1%Zz$2vs82SVqaTuYzAW|F2ZY+X`8f2)NYA=PVDiDB>s1OJ_5S%3l1y@w>l4`+* zx`|Pbme3(_Kj?&K$VRWMN`I!)(Th6{wcMANLj%zMllhk-X< zP@Y{Rq*gA(7S>S`ckWB&ygk^Tvj-Z?3f}neNay3w+@*?}hX)evZ6z5@YcElD-?

xm=?K@arxqqRXjo9X6v&k?^tjQ);#ULM1V0 z)2vsGt(bO%S>UtZ8391UcuA~QZ_?9_lYg-Q4!w)e-{a%71GLMeG~;d@=5(udi*cDt zB&!PTDfPnJ9sib*sxzm=Ig&T|cfZLkmE^+p6D;C7+bQ!vqihVb$%b#~rt*%*i&$KE z%EQN_j$xE}5gk+`KRB|?m1&7L+jBLsevRw4O)FzB9Jskuw-32^W*;fP?@4Q!wH2%a zNs9HYeL$o{7kya^T^8>m0o6n-ggD@@yrP({Ll- zCV^71qhh9%*1jpAu5s1Jx15Hre7nG@K0oMwbkZMoaLu9pl8Z(?LfJbkcwjkWdE&I} zcsKktymAeFk6DAeOS&?Tg$<>NRm7J?|>>N{1e^ibGZuqppOuuq<*WA+IN5|1S&K6YBAsY#qxbHvllq_N$O~A>3zO#Nc$fX$Jn7t zU3s^31+r?9!);&vc}$Ek#oj|!Zyto6yKlNwTpli;`gb|!Uk*loBSY_-Wnmd}ylVrI zD|EN@OV6a~=Xq0DjQvwpq2RSnrgg-dRlH{(b?e6$gx_iQ?FR{5zVjq@`C@bVB^O5p z756(w(K{a4A@x;ecEV5JI~KeD!2d>6r|hwUq9-Ina)Pljr#NPV{Z}&8f7=&GqI7+z zFFJ~^lh5vGC>ENSbEJf;t%tiYd6T$N4ZsD3IP!mgb@xLcpd-i@a_hPbc&k1|oQp#Q zf}sVF30ViGK!K6Ne)ISY}AG!krMNrkYX1~0}H1}`GM5{Q=_m}_l6RRs}4h%NOhOcuG(aM#ctJ zF&wcFrMl%s%}Bt<-XfU?!i8JWtYAZ2VcQ_W$TXQC0SfqH@J)mvh!F}BTvjwR5tsom zr~JRk@UTlL424H3+EY-m1kB_T7pd^$efF>-bvqvTut}f|BD}cNoT1E`5^m-?p@bwg`sArN`va>vm#akdZdyMA@^Hd8ZB zJ~O@4#@CW_c5ZcdX6^XEbpB{xc?hZf@=rJYQLGCTfXISh}#A;t!QY#8h|6N4CuR8Gv^WrV*YOv7OzstU+ zo0O4G2mGc>AH4AkpvEWmqVCj3`$yDg-TMP`|0*WKer3lJeV`1pL$hG#esH2y`?~XD zX^*Fw^fS;8*1GI4I=IKh7naA{vny6QzMysyKcoMM2>57`5BVI14+n;CoB&xy z2-Az0_z|-FnxFFxeDQ8kgq1kH*B|qw5)odFb-=!UO~vfbISpZ<7u`8|hS<}xv!0jA z(Z#ZRs)vysz8!J2o=8Q}zoyV*uc~6xwE5ZMk$G!YhN;JB&LDc;-)ioS0im2h=BKJR z?2XDEh#HIb7S=u5m}6K~wO5w<>1~cT`Fi@+IbUrpd*^@b;hMPv{`PCCnc^&PGVcFn zx&tCqZ06%6#wMwA6h(|q`qfkr?Xi^JQLn_ z%E#qc7qt;t)lAwk3uorD1HF6_j82vQ7)FTbpDRwvw7lVdd36zv>m@5?z)`SOk20{3yxZf3e%t^d`*ak73%mP2GF7Hd~j<#03V= zzUl@Xd^%H`Iaozr`2O+9?}P~8-~P-oL3fkc%8WwN22bAKV)UGhcJ@m(lp5Kx?E%$$ z*LP)jG~~_nULTYP#Rd->G`#&8O0&DATQB#>e6N{xk1L_*`%j3* zg6iws^Sqbufv?fZ-GMHun!;t_qLZ1zZ50|_5!MQ*mCYGAzgnb;2mRhF;P}zCgK&&- z`ftWXjyWL?2_2LrC2kl39h`@iErA)H1-MY(U5sa=jKUUTE%})+xFEM6M_0s0&eKay zo`*?9LOt>QqHB;>woi?16rg)~urVYd?iL*Pf%!50?$yxlJHmk)WyK&}zlO#Lxg?A- zCB6P9-R9>#%g653XTc2on5Y}zk|pN&Ynr+;hG#}c@7AWy8ik6@`{Ue?bK(1`25`Ul zd^pDbql{Wzs-iW>t+OC5qcj>{E^d(d_hQ5}Cz941YDC!F9b<)y5}*HPjtDcvz}$OH3s4Pwe{n?((s*>hIwf`0+5YV8Q)SkJ z?AZbF{E&m|$F=InlGfL1I9hzoL_cR)*2^X@NZ?HA+aN@C?r9=>rc+6wFPy6#L#NGt#@!fS`s0+EU(E&RrGl^a-F5?Di9Qe0YMtb<6W9& zFNu%JdV>9K(YRDpv0jpYeX|l_A;T#fXthF(or80n;EWBXMK*2%ALrfkGY5?^^f@j1f7n+?Z|^HYV9`|ws} zc61;&#o_yfa2}V~stg?}2PKVuXiP z3Y9H#T$#`rL76}mb%c1e#IkuC`rOg~*w}w5-|AP!_Hg9EC)3aEKtO7@lDMUTp?Z2g z5S@f)*`G9*bJ}k0g+z>|9{S)w)StGSQUla@aV=;ojZI4kylPGRwUu)#dH^P3ahgsu zlPEB8MGFH7d|C;zvj3TP`H{}evqRHY(-8$1)D6_DOa{`2+CtZQ{<$J12lq@6cCXCF z$|<6l@4r@yl}U&{7I-?3dCZB0pzerwCD+ZR>hsqL=q`$9vcw*}1On9>5u#)`;Rh<*>pMmg$oO&a; zHik|Us!niy#i<%pcxAHY_E!K?7nuvc zdYcgcgys;wTm0{>Vt{g|+_8b7gK@AYfa1^qOFA;n2b^f#AL_S~s$v@K+mcqu7mL>_ z&75h{e_zT2!-YbJ@-6kI$p&KWG^|rAQ8kpI#bG;&8Iu`q)7k$dfLeICN(H_oc zO+BvWvMcwu9Kh$cc)VifQ~{qC8OPOA|6gK?rs}RLK1|ui+S|-1jJwxruLxbLN?28{ z7M+8ANX1NBPs!Tl-n8c79%n_esq@8}(RQZFRc(jO6EGOHf8zYEk*EOyI|~j_qgMH5 zN@aJ(n6H)7VsV|KPi@bBNg!^8J zMsm8BMQ+eA%;>YXD2W*To|4?O@u-8yR2Zb`mZ=%A#GZV2DRC&$g*(`}%17-Y?cYFt zX#L^B5GGk&y;h|k8O$Z|{x&B2lAy>_5xXP898{1`>~6nIy}HdK?ym>jJg-J6(`z=s z4$v=*^jb|E*yzh-^kuf5tkz`T{|0JcRLmrlvFuM8&RFaK>C_AEL_{b2hCF$!(yw_k zu{ZCxAC`_?2<=GdP}9#c+J^If8XUzro}`Csg5lGSXVg`Mx0O1F{8>PG#WAtgls;zF zZv&)FQ>EeeYo*`@p>_a+?90?&0gqxcxbJ)L-C;uJl76<{bk;Ir^y%KtNnh?Jx#(qy zx2Ar7`J=9^PrLJ)(;B{3IdHCQ*L6vK_#?YPlx^XtDRVR~Tcm6>r@&*HKvAWtQBKnp z7QQX|*U{BG=UNpu-~4^cMmnU+=M}r<_ksy(v;l6mmShn&*ZDXtN1@shjA(*xa+gnz7F!MJ= zzrW!XP5@uA?lET*bvf@;XL>G4HZOogg*4l#yN_p}oY%Q+EmTZQ6W2UaFqWXPA_-;Hg(q=|lJjR{ddV?f$ntorP7MiT@9yK!V{xV-)n}Xx4!H*2M$n zzSzApEeuI#a!P!httRC?PL>{k>%^&_u5;R8EY^Lj0l($=)Nh;8YNfQec#AJj^1#lAn6Z@^42> z?FC!@<^~X9SIv{%V!Z7zgRqI|WjB)jwW}=%5elwPc5nj<>u-v;>~r>$7_6;+)4@~< zT$mebEB!_Q%9NU}Ihc5cEn)~t6Gr~)iZE0Y0*461ie7$CYk7l-g%v5nC>a`cK8T7- zRPs;04hU&yg*TB53srrhtHu4*73IpSRoSo_<~Fc{0j3bFykyU`s&U+hx&14qpEr|w z`bkl*S=7)NPo&L2^sf5azOT6az(L#ChzNXb)FGtKx1DPe145ZPmip^eQbnrLF;q+lGh#n9E3h19oUgpuRe&&vmSzb)msZ6e1ikQ8#?cic7pLmL z0R*6CsmI~jVZ|)M$)h$2E5Rs$8TVybUnYqAysj3@tiX2T$xYe}HJ>@+m9-GSEMd_? zpc!}(9-LDss+BNIr$#)$VVM8f2}vbwLG+R`L-WjGRz@t!mI&8Lv4a>*@m02d%J$GCamxV%J|HlOa&(D95f z4UcFq!1fmFJr=euXhoi3iZ!By39clUn17BTIW&7svNT7SHj@ojHo{!ETFjpXuLPWP z^H0R=HF%R0F|e>oCp+aQ1}6|>3VFkq*W6z~}(G=}X`Gcz6834*slSM#|?I(Q%Iu}D5__^3fHw-t!RuNJO%X^z;wTSq4k z)u}~oJ**&DNwxcO9Eo)88n_2D76GAIcT@gZH;#AT&cCoF7LndHSehRkYMUwLbn>Zf z7iZ50eBl{1Fs3wE*l%c~*IX)>HEwL7TSsVoT$s~)8QUAZP7mUKLmy~FdL~+3H#%`& z8fBi7dUYBFkZGvjSQmuG`}2ONmGD(>pcHfn2>vI}pB9?D`LoOYCNm?|_neeJ3P@%A)Zvt?OfT;@gBYNZg zfw4cfPMt<1))(g~5V*{FW8cW1kkb_HwiDM~J@Xv{29>U&v|)Z@wbMMQtB4huL7+2j zKtze;Qv^D?H~ZLM7w1@40SQ`erRbl}+h>o3Gd_Q9z1rZD%Mvm2B(Mr*GT^e>T`|Xt zq)fs^Da~F$-5UoU^9Yb>p%0m2O!+}K7reE3vsbtcDNh}}p*UgSY~m@n{RyeVpZ0Z~ zEVw-4R@A6PHts_iFu|Dm29=j^=FGW~ys;TL4{E|UH~OcgGL$0q(f#9QzRg;o)^uo$3D{||bNVRM1z?eq=!Ac7 zuEvAHI|xUh&G7NQ#CIkR<0*-IqTe9{Qb)O+P2=&2ay$o$^-g-@xiGfdW@7v_H&OYM z)KZ>s@He+n5(3bT!2lx-lG6Qx7QtLhJs1Q`LKR)az?5H9Liw~Yy2E^eZGf;|f zIGCFrkXhdd!UOMVVaFzdW3ZMf#e(q5xJYWh4R6}JK z0&9e4)#wgih+JG=LQ_l9Cmbtk%h3T|96IF)YZy7p|6tb~+W_ov6t9dN@~9AOC2TMv z4~BvYGm_XpAK}I;Vb6&a7S@5%1Nf8QSoLdoIEV~}A~GyC z%Ewz~p`^p?jhU68T}dj|7D8=G$DlamziNO$Oi6|bVTT3h;K|w#-I9RL5A>BS#t>MD@+(Sea`yWE#6~4E!h1N`#x{5+p^)fsQj8apUN1LCt4r)rx77BMWm$CvGsoO z-1I|wiKATU^rhVPTVaZak+Hm5SO1F_H0NgTAGmw^^K$SaQu|HHQwE-Kt&5s%`SbAk zzjuCRn-K42{%8Cg^NTXaEdZz9toEV0t+jIf>A8Xyt46Vf?rE>wUZ^L}F|(Uv9jUcP zPZc{Q!%GR9%jHd_<>a@wqV0^8abm^Ypjk z8CDa*umyg}T4r|7e5$2OxfO?tye!&ZEPYq{RfXO&>WB0g^2-^>Aq$@*zkzJdJRjdr z{mrv(o>BQj2r*Mdb-hYcG5)uRHQP`%ji2-f(VU2k`^aNrkv7{4_GfifbH{`kbl9d! zj+0Fv?2B6YRs!ch6HtP*(XQpXqnA~)p&RC3zTRL@#uxcaR8jFH)}(lGRo-2Bx1;v% zPL>s!d1KI=pxUm4vsd1hiq^d6W!$;&;nSt-{#4bOV*-r!TCS=qIPq||?g-!UjpaqTeJv^`?8AC`_y{4n%V#c{ho_%T zn199<&X97|{60PGM3!?Dv?<|F>cD3-gMNv`5VB zV~1h$$@59^(weuWyNZ1UffA50aM?^A-MeprC-DEj;n;T&I06|1Pl(OS=%I7ly8Ay4 zY&bn?kG$QGKy0W^XvT{tUHX_8{r2zom#62R$Ropr>hoszzkIsiwr?B2{Z@Eyy0RlB z=UD=Y$q}^?ha&K}ze*tNfXxeZmOAD>N_|#W3V8OYctP%&L=g{MT~gj z+OKjrsdXF`DK0T6#k@+I|HblmqiC_Klzc@8&5{Q}`Oo^8&a|HQ#DUK7EJYyv{x(Hs z9y2mG&(#1oW_keJ0m=w~Mk5DZ>58DP_6LpO3FU*$)Z*w8IjtEE#(j&KK<7@&G6ep zQ^)qatbQcdf&}VqEjdEj)O8~ z_@9$))cep*Gfqb1J;}U=t%!8I7DBDU$CV<|b=8eE9K}=_g>K*l^#xX#u-t z;97g%zi4$ZU!ZbbmlhHuwLVs1;4wUr;gHc3&K?)*?J1lX*8?qzKxP_0gFb3lnN>cE zz?KoqB1$bXW(;Vf1=5R-6qqA9lDYvV&4inWm~9TulyYbzQ>gqHQP08jxgh~pb5Bjd?G z@R3h=8inrtgYoyGPWu8BOFh$z5vS0mM~MURlFe=OOn|t6Vdl6Qgt^ODF^I9|z)S=a z@Ck0IweVoG+f)%+)Q_(T%QvGlr9nrla$!FxVbF?tAK2l!Wg{ZzxVkzy)j#@vUWQL1 zYk<@e?!z`@Jh_dJ-xNu7io20q_l%Jq35iK0gH1YjgNXo=rNMZeewOL}uW?CuPtrUd z%2Y(HZUAQDTy|7BOT;^S)I8K=U@tcf9ipFw&UMl9XPti6uZsSxQ1`r9PRFHRGBo_lW?{D7koTE*?BqyO%&k;&jhU|p+i zz>v688XU~AE`P`-Qf$cQ($c&mPT==A=5dH6o9^w6-}PvTjMuk46};Wr3JM?q2cMOU8`Fc}+!mW|o3^z8N( zyVjyj@&qK87x;Yx-z@mfPYX{=Kuv#(54m{1_we6}OVHH0sq^y*dmr|Zf1jp)I z8U*b>q>i&j+M2eTwwJ6X-k>xn0m^28rCl=FWt^h50pI@>Y;*&+h$BbyeQi<<5$0Ze?6H)Wzqms#S<&mFN^br_!ti5l?#lZzyvOp7CR zoJ9x|iNRU`Zo6V=mMmln+FH)m*Z18Da?XqT2QzG7mr$1-AW1C&Gt$aTRYdmJ0RN^? zG1D^XBnt}1y_&U1rRp4j5`Ztkk!zA=E@Mv^_419jADJ@$ghbUDU1Z@im&>H?8l9oFMhs>I4`R_Bv>qy*Li`S3H$M*s>A{l5^Ya7TO&FLxEXXi)M(*e@#C+Xg z5~0f^jkkG25!9moVict9Q>sOcD@Q%2`f0%RLC6@)AkahHdUv_Z+9wauX9}jNAQsO% zXy5_=iIDKq{Bs=?%qE@d9_r=2Ke}zy&99JpI~8FG8u+OPk=&l6zv-G1G$Lbz6AYvy z*#=-C{TO06Ki%$njN6K4X;s-T{g}=_(SY@2F(r=qb1rR59dg88QrSm3qzRW|c6v39 zP*Ge9sT{f}pGPfhhTEqBo3`KQ{j5y#7x@4j?2D@H8z!VbwxXLUT3{$21mdSr9e+8| zq#q0r6m?4vH*pb(nOoF)gj+Ehbb5;b-+Y$@Mcn*UI8jaeed0KP{&b3kzvKbtH{?}}r7$oG?9T_#aDdLO$z!7^Y; z#;1Dq0RVJd6?!j5D@haA-*&iQp~_5*APDH24UvpDmX;UY<$=DYgrgKvi|4QGs@oV%di<11072&1X{=UhRgXYkH zPF8g0!-83i_ou-T``2L33Nv-zj00;^G^jW+>VSpN4rldOCQL3777}Z!>`-fbAbND5 zok|&0mWAq_Y&T1yhs43X#dy23ZZ=2{WhbIOn;)N}l}^YJ`I$vpMe3vP6-?K9T@w_! z@3vH~liQp*(d?RSJ|jw6mNd*;9{HWyEeXxtNu7K^k9iL`u*aJ}ReUG746u5gL9i^t zZ|hy>v-TIApjV(&m~}Xj4oA}3VV?0oQZ58um>*-5N`={QPqeCPb{f@DF_Q-kG}gi% z=G?Ml?L`H-43=b2($&GGe$eI6lE_F5s}k|)5{YG}dklu)=i$5dcJGU^qp z&kpf%Bsfr+p;dgn$q}^N0BcpIMksM8V#hdOQAYjLXo|s@OTB`MD7&2b76;ks0;>rR zVd7Y7Z7%{YlBFVSCBBt0lX>74edHyuKP)01@`QpwTIMA1xU9ElB?}iQrH><%Sxrw% zshR>y2V;dYL~BR!S^e*2t?FrbB|^9!P&>_d0%1AnM`ZU5LGRNHfooIq$Xy&Q%|+dG3cK1>MZyr~U)3wo ziR)hJ;Op2~N5IG7p(4nsA(SOoYI6kxw4pop`J+zw?q9;QiO@H^J zA6^XFjQ$bDjpe~nSZ1pw&xi@6v4o}s!DvN%v)x?5g(Hxb25wb?B|gFv4Ffks8>pSx zbE8bP=36^p@ruLRe}UQ!c;LNIlnCOyd^7}vdNC|Q(o6gjX)Jg1m+`fi)6To}vcbrkCIM!?SmT29?%f@*Mmhko>y*pE!=`5{J$Z&GxDmz|S5=dAe zgq(EZ=DVs()|6etj5l)GMiLlFYbs`iYWGEZOIXnUQ7wKqsBb1e5&+IJp#k^VuWztgC+HIW9oquR zEK#+hznHOj>pbn`u{KYL#q6tWn1cR0)VY6S*1V)aspCsmY2ALe6t5&& z%JfI1SWb*I!cP=Fuw13hz{}L6P5BnO>@>m_HxgaGro)m+-GZXyF|@z7Sguo;d$f<- zKN8-|0mD;B5AzCb9-FxjlY)(wBPY=Yh43*H39TN?rs?K_VeQoEht< z?i_tF9H^`Y=(byOYUhSjh1^;Gumx!&AB?uK@JZXyRKw~DV}!S zP+r6TXzXS+xBQ%!KDJd&+p{=2q3d0di&JZV0G)KoI-@L$H})aJUJB>aMaG!#FCDjs znor|_LzJ>1)Jp@f)cbDuRq48SBGy)wGgO)^in33k47EWtY{-e( zu9bxb0y}>70CSOAh&u?StTRR^b|yWeB?LyGmu}iVS_KUieL1O|N;zSUF%RGYy@q>; zThrq&1RMu*6T7kVTb&{#8Z+8t6oiR!X5zg<(PLH&df)8-D%8C#u&J65bz) zq<`lHWj*a?RWm`SGTBq(g^`LgLE7><|H@*!x@sMS(^J6Rpct!P{X`mI8+$TA>dl<@Uc?b~Na(E$ zIcVh>s3h4`V0K}K;_BmRSWnD{2&YZ@cBi~#*T1wg@0X42_9LOE2_Szzb1Xt!LN5N@ zRZEKKIBNgWVs<0;bU(81ru+4j0d#ndw3>Y43WI=5$o}o|8q|*()ORW_lnWFHkFPI2 zW?IsW+P?-ADCT5h&d5W(kT;6eizvJJgr)6w0xHZvVgy~OG$qirDMu*HN_Pl* z-PRMWK~1!|FMMg3R{-7L2+3itBoIdmk}efADHxtyy06UPGDoDG+^fihn0e7V9nuL0 z{znb~M!ZY6UrpL--QSTAxKLbskR6N4$1-VZ=Bkr z$7kc)XMUOnTFmLU&}m)M?>OkUJvLh=wJk~F8h zZ5I#hC-lS`b^i&*T{+|Zhpho!5S}`iYb$c#+5|q55%!u~x`Iu_2kgl;%HDD(h)B_< z6sLsPA&cLG??I07(DVi76<#uDJae1bOcB+UqVfY3eYdppVb{DQeLCF;#zt(@TH6AP z5g`W=#BB+@=J2oxmLX&*>?NzfzFuCRt3|(5LaSc~Jwx1#2<+uc;M3Omd z69%R|l#X7NwZ3W~2A&OzG|`)@gdFbxHMtI)R&D`%swm>QV#%!bODTBQ$Q$uy8zu`j z(;u)1DM;TL=6c`1%glBrXmA?RQg}0djX;*LG zMj+8IM}r|4Zw=2bX&L-Wl@a_Z?Y;^lf+nC>!8Tryxl&2|Dbbf@rK5RY#nd0TLKpl? z?q7VVTf*~_u4cu{L?)TnGY$jx1RaSi_JsEbekpV*%*hm*ZuTRo9ND}4eRk~J%+Rrs z>tbt)#vt8ho~(gT$}FDF&d@#rS*^2Lq$TOKbw`e<Ljf@!b)1UFDD==i;_J zmkQ*WGu2R{UFu8!TyJmqcknHWd`mSF^iydpNUd;Hz)64$)$X4>>fb@~XvT2y*r<;} zc0t~qF1@1*pN7Z}=960>7CDkI$}aBI5UqZN0|IB|L5z{6hm*#uTQ8BQpgh&lz2zMZ z{;OXi*0+`>1@i98k1fbj&8G~WJtuUwE@#S6%fxirD(d}tQ^y?)iUuaauO8IcyH~}P zC6QDt{!$E9mHnTkz}1hLqKI-{zkE0k}Mku}VfA&}jhJYNPJUq(HL~NrfZg z2n;OoJtY%@G03HHepNb2=e1O4f_06ofsFXb92?p<1Ze1fBuQ4yVf?^SVV(V8Q!IDs z)mnS*QJa`#hlM?(+E4 z=+dJ?C1SpT{Gkfa!wB%)zLkOdTDq0V{g(2;;OQ4?kx46|4ComQl1!!?j&4?ZHT*#g ziztZ^h{GuoFKo{zf_BOm&?6o4&$wok0=$dsnesOe#VVw3w9FL6oJa+^KHOuz!jeSI zXv#|@uay^R%^G(0Qv0cx^eX{MCg-sIyIb-A+oT?XMC~s??x4qSjp#F<)!yc(Tv6?w zR5#J77!4zonI7yTXPUaU{qJzenRF~rz0#J2hGK(0)6y4{nRI-iLdB^K?EVA1r7Q3` zM#2mpiNAV@!p>$4X@Y>RO!E@aF>@AyZ{9Le2rTTr zYVLE94k006?EF&UyLIuf-cjLV6S@r(mG3if29qH%-3xQl)0#7g>8&`9=rTm}xLnOW z>phOCU(9tL0p7)Q{eH_^v+1Cr!wG_O*mRlRInMO|ZnD6pXbfX(* z+l;B#vhmrguE@S)&#D6$_=|T>-fbE)hbr@!fXxA{efZq0B9s~MV3<}V)n|$!R@V>B z-;pp7+++QJFw59oRt-a*)dPD&QoCvIZQ4F~{}DmcH+K{-k41zC$mdL`9&7i2;X4&Y z7~p{uc49AnE7h`ZFs%(t{nS=HZohMfp6Gn$%P1luY+2}HaL-PC^+A3Q-sGT~q_B4W z5~T+=-e^MIbV?pNFTW?C7O%YzlXj^PLPijdW{!`|Qf40Ri)+NHyAHh1Rvsl}$>dVS zmx?`QqRXw*+y;F={}H)PKAF*#5cK-0m&ld24n244u3&X!YyLFlT035#wg1i=_7%&S zkNtD}>+Z-rZ8qUL@qo1n_FuzPCj#p?VzUZZ!1RX9|FlPbc z%kvjj&Ar0l>6+eQCo-dAbt_Vy(j4e8fDBZ+Wxtia@s!NeJs`F(^ zi_ZRZ{RwKvi9WO(uwBlv>;5>el4f3-$)_f#>9ax~_DdCK{dc@wEhO3HuG`(y^SbfI z;mTG+bYDEVGJYXVlK5<#D{Jg+c=O`9qK&(L1kR!sGe0D{_vT7KnavQ|oZ#0nf}O}n zb>y`dn#`Np&|H|!+Rai|YEA^bk=R_QYspk|b7`BRJmPT$sQu~MQq^7BeERk>u65r= zMZ9$?GnZp5U0Gm`EVw(IF=ivl7jE$+>f7{Q=d*eFtX|cmwx0vg@T3l@i)$I`V;I( zBUS;$v%#Q=aFs* z$NLuQ=&l!Sl8AtO5~%ZaYmePHO}MV}qm4D1YVJ7^x}7@r`SWKaHtVy@e_t6CT$Sm5 zwLDU@z%;7s{!?m_yYR2uXeuY}duDVGjGc6))rzWGwV=m5L8hubLL0U9nRWlC52CHb zqN9iULRA-W=mwmP!(J6`g4Mt>@$j7CGkebpAyx-^pK27I332_tszj>2^uV>cA(VxH zA^g*UE1L1+`K)|Ara0vJ_L;46#!!2-BWuhoK=eMI)D#Ez#ux=sCb zv@b{hZiYNv7Y8iqPq9p)J()H=w(k;-!0+Q%HHIWq}L8=n3+r&gf|*~K|M@i= zV1+Z$HOWM``-2&zoq5Fl-&YLYz0Paq^RwrY6g-XlicA9HTq;eUdPr=^;`DX;C@PvS zSA|s980BBTaOOW6>it1TCqYGb9^Mg`8+m{mk(kD4?H_Ia2G(oan6m+@9pU_i+7jQec!zIfU z_6S(RABV;6RB?z9oOARE_Zd8-0lJwTL%7>l_X23M!O> zoHgH10Mck;t;z3A zx0mMMku`lvI0c3dHAo z;rZH%s2$EUmQY{gnA@qF=wTU=7^?8E$Xzk5&F(tYk=*}~W!8O+!#c~)=t)EBi{4Ee z=ZoPFYBH&{S{*cwMqT{_o)|5~eU$dZ5|>+=FIM@GAdclW146QP|0pl)|KS%nmff+p zpZ4P+lYmi!BLt(101(C%Tj5ZA>VAFP(r6^%1bgV4pPdJ2y1zy$#^q^LtFM^<^B{TF zpp3;E{~U}tm>FNR+@}hqoifgY({G}G)KvVD3L)4;tx_>v-Q1Ix!K)vaI#`&qf=Ik_ zirwCYVO5Vn{Od~qe?Ky;MWnIv6x9y1czvXL>+iqYtD_Z}Jz(0x#=iP@HA4?UYeW>@ z+ZLKdOjAxekEEa8d*;n5ffRL5!V{9tJn7Y3`8f|IG1d^v!4s9CV-23gqVGk%c`z7b zRLpN5{Q_}von%NQutI9vx@rZTaKY%OQ3YgnsO?9P5xzTVD^M$C738=A*ku&UUtrqs z!OpUQpy40M`#|nX!=OMujFTCqV!@buO}>(Gv0ylqn0w0Ie8E5%BJ{&vk9X(mS&U3r zjj=wTfTa{t*kY!e&|h6W7h5J<=o`yaemN;hpE@l=oYy;NlK1N!YypPA+{u~!*W0G? zuG$c0?XKJ0bA*sN%ZuKNV5JiqXZ(ISq5`g4GXk={9{@cXPZVnV$Z2Hf2^wQ~=R`Bj zrQd1KF3tOoCrowiPm_8G+1fnWnV}n+bO^Vu#F$(uc0a20!Z0R+NqBs0QSmWV;VxWn zW~uzp&}$zlq+Jy3kSrd?MGfgd%47u!ilFP8g+U*GE-!)OMM)( zF6FSzG{6Zt6EdKi`+dW;yfB?Ad?DjOX=yEf>^xPvF`5m?1SM~KiPU?~A{dU2_&L2# zJPC61st)-B4MI?WXw|EuF6Ivn;*#1!fllC=j?SDsiV^Z)Hq+7O5n8B27w^jxQclOl z{y0II~TYPr*N{L z`=I2nME8GE?*3=(3XF?A4uulszpX1cCRtNEa~BI@*8hIP6BOfU0PT%v{1#N-oSI$Z z`+h@RDX>6L{akWMFqmY*co@IhBO&6!RSq?}-nWm;^S!7>r^bP%Fn=4j7dLEq>`2#N ze8=h1tT*9{@k{ZgBW+YzGSd{{d+pj*g|8h&b_4}f`DoZyq|4kE|A(z_jLj|D)~;>a zwy|q#*S2lDZ*AM=u5H`xu5H_HzkR-w`{O3}-^vk3M$A=evsn1w*_0OafoMEva0tX$Ex+oiF$ z3*pKs_|iLqQ_3Cy{pE2vn5x*n%TXQ$Zrbzsnb6=WOSOVGoO87eEz?lG;>(SEN{R)Q zk124S%FcAcYg#n=%JH`H@HtUZvJPzpn&|0na5&$fjVhkG!_e5Rxx+xgKxP_JuEkTg z0{Pnpl0-$W2Y_Q$CT`66$9jiajOH9ku)^!7lcwvc+$|5)&;UxopbaPqs@U7o89W^m z(XRW2LKjSX?0*!BHYKESm~#+aKH?<^DGI-QtO2qgsO~a|_nXOtbE+SOCbYjVQp9+U z-w`WaTf;5oA7H;JEXL&fTl;+m-x|uiK5Jxne!bSX^}PfD@E3ojadNi4bbd{~2yEJ)0KB~)^0vM! zH#!Ls-D_c=8GT!S9=;hrYV}d&Cfqx}TEjSB&e&7w)FU|y=?v#5xPQZVu+|;)FfKHj zTmGP&>VCoxTtEK)%IaQTDlIfUn9y#+NhR8^vM9o}U-P)*l=!1kRV6 zI_wrr;e;Ezot+vgLUHpen#I=Z?zGD|6dW84y zfdB0IC1dK{bN$8gjAjwXFN&ET)finJ;X0g;mk=R<_hkE@!X^1-TlXA>vt&>BGAY%CGZ(KaQ(4bOI-?jRLpK#+;_KW%dGWj zbSZ-Z!V=vo)PRCCm_q32Ily_MA_D`6CXxLf8Kp`M{$hGemICc0dqjiI&A1VHdOSxa+) zM5U76g{JvR1=L{bs~nM^Qi8CB=@?|;RF8w9T2UBCLdK0aT1dmihP9$65ry03^VxwT zX6#LU@X!-ZidzHX#$?g(HLVY1`QVPf@hY4*yqq-$1=J=1bgs*c4B^H`f+QW9trE5K zd-WStkrn}!sdKW?b%m#%;&&4ZH2W8j%jHV+Rgfn-2}kyt$odFXv~W3NVeKg>1sNErfa2QK};!Sg=bUz};aL9ewxC&Bq(6*76DrKKB-Xq5*stM8i z=E#%4*d}5q7D-k{g(WGZ&j{;mAXFHQHS2=P@s$MuXOf8}Ah>_F2h!^4BxRtfMI(w~ zLoDD)O0h4^C`eXQu3o^Pu)%Dl)Ul#~iLCl7iYJ#fn2Ce(F}Api%gWowu$y;-kLbqM zr~`r2q+mIu3?hEjO;eAiRbi0A7^e*QkhKX%kWEkm`%V_!oMXzMS_i%02e6=-{LI3Oe7b<^Vi}3F5o>m4%gz%MRNaF@fLp~l8flcpkC4?7aS{g06k9V zR1264URf(tpW|M=!^^WF60durbkJ=HpXrZ{3qX z0P}G#YLw&kxJR&N$~kS^LI65!Xs`q)jgU_~vbJdj)wJDVn$p2I zBG;h}3yqmmnN@R|WcU$%M1dW=>$bVts>8>qW{LK$AK07H=vTKeLJp60i29lEdp(N;mn;GM^^1ZL| zQ?A()VqiEi;-IJB9rG~mqq=S0KNQI~8>5{1zL=(+R7zr*=UWHBnFSLICgy)d&&W3T z)H?M8x8aR8h5b)ncyZcn$fB5yxX=DL*nOexNXC8D#Go%;xy+#h8XIbP1cFp5pMjVm5 z2rnj>=LghqcBDo>?k_a!Y8<*HTQQYfJIx4Ca@~n5$JY%H7R0Mne+^W(I9Sc>d~*i=h$o14JsTZ8`#G8nFaJ1JVu39f>MLbtyVZlB2xhV;%oDc z01BDpq%dq7(lG~1@m@l!Z4PO-la3wz9N&pQiPIEKa6W@1x0oAAsG!y7BV{AZIdH56 z!?Tz~;{C8M{b(N@F5kJFPizRkTY2rd>wJ}elGu)cZvz=|>Kl7Xbuq_nmC>gcivlVaZvCn0c)2g2I zM(LE~j|1RejLuwIBm{1C11hifv}2A)Dw+m9MU7eZ+G`&=SmfkcBWpJYP z1=;nDbLVMThrv8=dOTkjHs~#)MN&#wH~3_#5I<9J=lZp`;a8h&TQLBWF@Lq>mHa*} ze{Nd^?5k5nY3PraO0|>4_yD6Ayvv9UBzYv8g%Jn4lq+2%0^B9b36V+rtz%dhVDP8{ zQ7_eSsE}%u;hVAx1wZ`W*AG1firo6oPo{fS8xchqdcc$xvb({976ZRZqr^2%v;PUyzD2N=C0;;vZcI$iEtbTOV(VF~ScA1!WDIRQ^rBK=< z;Ij6jf%aFJ;Cp+1!163EX5c((^%AM1&-nAL>Y>L(ZS@@y6`ubovZ$%GfK6Rd=0Lg1 z7;}U)QAxDkzoTg)|Iz+s zv%oq1H3#meOR=g^Y$;O1D37HEbW0=YqPehD#ZU4(CJgxYKbo3x>d5cxhM#^z)Yn_! zRZ?YPZfUV9OO^htCVF*~nKo*6kzG4FqVoP`8GI`pQz-$=!?9R2Q6X$v@wWQ#9j-Kh zvCp_M0_9n+s{0*eqIBEO3J&&Xy}fgY=oTnBoAyLttb#`OOvpzya}N?3`}QlA7@SU0 zo+dRx;}Qbnk9au{Qun@lC5$uuQ7yMuNDd+(15V%m{jBFefOJx~$oJw2O{6fQt#BN& zMM~BNs!Rir=t*T&`D;ROTMLK$95POupRfdOc^~){{n+2O#Fl;=61wL}8eX6I&e;dn zJ_+_4t7Fh&;y6GZgrcd`A9}MTz+rj4A3BMszm9h#n@-FVFnG1M$TKP{UsVea8&2WV zE1R8NsZ)XhDoqr^WP;y^O*vrz$=&}*G$)F*dWr#{j4rkIA4wK6_Lh!qwP6_I6%(V- z?hZT+C!P6sh4}<7r*|AmbJ7DgUO9bzYb^^=pkCEv;&Q?DMsp5J^ParUOrww#sx?m} zTWWbWXY2Q_iS2KCM1-Vmd@hgD(&L-?0MV}u5b>4(D6!H)@She^02^X0XTWZ30byRI zb|L`Ye>0Ksv+ZgZhzb>Qq=BA0LOP9zBP7r!Bn1ib9&dG@3sZS@%0v|dw6JEC%F5}{ z+1#h1-xwSY8J{b3@m?jZtvv}$aLvirO#hMywb&esz_#17DS;Zu_GfY}6luszu%^39 z{{RlezXv)kw-NEThQeqvBeoNV2RNW6&(5-7&g|+T(JJhAgfr{0Jg}x17~(cP{4F1p zhzxhLQfJg(0S-nrF}%$C6?6GVZQ}~@L}WpG3ZYT7cCLoCAaoUE-XpwJV9*m!V2jlR*bLCAKb#s~Q5#CGdef4r2_41q%zb8X0M*d0*6({Wh_P*2EmV}nU zad^@UXN>sSM_cLJ>@H1D8D2h(3V;x@Z|%5is1u!ws8UaZe}_mH=h11+oV+F2yuxQf z;=iZ?OV+~sdnrAZZ{46h60GK?ZZK7D0Cis)gczTWqR2#TqE{{$api8Ln!f?=#yf?u z5I;*)P;iwjQEj$Ba?pr|$$kJ_{@J8z+`%or$(3~ShcUf)uxm9#XJpZ!Zus3Q3<+bA zpik=sTvFnpDS`y~8-vKt)D*dwO57%qH#2g}e3RX_8sA+s6r);6tX3W>+o>eL`o~(X z1Z=J{bxY1G91<^!+BBJEN|XR0;)j97th7X-*`%MVcR4eqn1qo-Z9`RJw=XjzN-CSF zDGG_xP7OEHme6l*rYjH>THJ08pD|(TVN1Q~CrhATd%cDV- zwkCmypmCDi0TamN0Sk#bu7Hv}pe5epkfT7UdM zZBDhO_J8EXezUdd67ak8kth{1QM_D|2NV~>AkDCJyHB8eBC}NUl8uEUQtxv++c&og zoa)aQLi(ZV31KJqIdRz*&H7@Mm7EETJ6k<(XF+3X!oeX*cjwQ@OH@-SIwjJf^;2rC zolCm6b=yOl2FqEZBmK&jJv9~W2Q4ihU)*k&|mWkzC-LQ!S?D?ygi7a!$R$zsnz8?O(m-#R*O zki92T?m3nhZ)e?Vu^rZv_If1TTV|+~E{{IV{h>;wU0X-6C{olAdxUAJBC6p+fb%2M zLWOb{r#dm&@pu}6Dyo#mLKWoG%#GuX3Yu2a(dx41#DJO^3LSeDlEAngz~sW6W-|tG zDw5D3X%Xh%6J9;4>8n0ys1X!h3r>3zckg~&nyI`7Saz&guZ zgmY~jYFJZ@A4WAvzf~9|Npo--Kvx=2PllGdg znV^ns$Dg8_sy(Zqa;x6d)Gku`hFxRXjX{1IXyezHDTQ90ng_17?j9dVWlEazEID^m zW&OG_nmq4b7}Z~bebd_BR#wJR*lE{5GNmYnnee-dO;Hz#%aM+nt-HND0CC1@YFIoI zjU*(x@odXeuxRS?SRR0^coQo4J&@>AxG&c6M62?wY zMilU$rmyTu=XO>(B4%vJ4&p%GzIo|58LjXGIy4qut6smv=OAah(gpB{$P2VhMA{-pDY^U$xC(l53bn1RXRq~Be_NCH=Rh$ zA&M{vt`ex*zk%o~iA$n;7R(~~rrB_?6YO#K5aR!PyHr~Jkw2#b0M9N#$e1l0z zf7yUoN<0RfrSdk9Xq}o%(avuQyAv|<>wdcZJR^hcFP+luGtERsh)9upF$gZ%twB}5 zELil_Fapjiw)z>}`BkG}C#IOoA`}Jw&SH)#@W(9egr3V#q9}qSAvQFJE*>i`%piap z!=jrBPq}hRHHV}FcCP{#H9h*_4GRlj;paOIfTP*BbpWZCiQIIEeNh9g| zEu!bBAn0Q|wAOhbLXQWxkK>$s#a8Gj7TBf^>za=G}~@?%BJ15oF%&`e;U9!@Mx zyhVAQb7iQ4grLnwxix!#Pe6=Lz{wl_#@$TpkS3%rbK0t2Bg%CzxnJK!fUqO};8MP@ zx=6_77r}tI~fFZ*qCyJ$NTaJ=H#dz^FOGQI{7#4I4zOr=&qw^odh zyk*7sSIQ^VHUY&5x}-3|h1{06|Aq|gqPb>sILetqSGmECQBt@|J*kJhO;s%z+!5Y9 zV55oQu7jAPXU)x6P8EYF?t3os2$pESLEz{CPHU*RzN4t!ur^O?IM;>~&pt_PnRh;L zu|m@8^}J_1r23_a(}5GsAH~~wPunkSeiN<{K>7V{EqJ^OPYeV6b|rJe!RA9!$e%U< zId)54jP@7q1aNiJBiAiAOn!kw4JqD9L^E@d(U7Fwp^P_ZOu{lf?9<$*b5a2|wku2m zPZ!6C32bOQ2us_rYJ5568^f_Xi%ab?7PGaY4vVn`dv3wOKv=JyO9p%vXL~%A=dP^t zjbo2rjh!F!oq1CJw8YoYJG;A}>22%)x6VWgR3e2BAs)b6gMF6s)r%#@U5SK(Q(3^< zfigeV7QAB%>~-Ua{G&Up%U1Ua8lk+R*G=z5#>mzOU{fdRksoh?!G?a}TPdFCpe!&1 zXi48b*uV4hy=mkh_5PZ8jQTHMdkJ5fUsfZ{bO?$G^ zvHy!m950My`rYByp&# zMV6--f_qy(@`o$yA8o|yK?-$ZdWNYS`^qu!095@DZ{<0oKZjp+gquy#QX!4OzQEew z+DvV8T(d7I_lRGQ6duQeo9P6=nr!$ZjAlMH##TK$hG`RjhE#Np;r{LC(la}pSj*vu zhG83Cfy7Cv_NK6$$wrE2Se6xG-RbBT8ad`UZuYI4L4lvuLG6D17hbc9J9TE~!gq(= z=TEyouFb~%;+@Pq09v9WUJervI=g@h4NF@g00fBl)8=5mqG$!vA1(oakx|_5y>Qmh zz(1iyX!nt*FL(YO*`K!q#~&^?p6v;J1b8wTDWNb2!j>Iw<{jIRx^WTx?Y23hzT*i~ zq&=X?i1_*EJ6~##aF&$iGXO*IOmafl#lR~)1`{nEqri|$*=A%?jpR7vc35G=m6GrW zgs9daRb2iJKCSVGr$rX#0G~IlqPB) z%Sd;~KXSs8CwMOJK$70lBEHQ5d8@IwN$w3H;d7D_)7u|arg>XT-lg1nLMPFs3y3Lu z%=R^rSG$|uuTj?IFaajQDJA#CRyF)e^~&QM~V`jTi`zF+6S6 z50;_%Q;7H14sBFVNXC1=o`}nrtHEja8;xsb425^!9>H`!L2X-Z2-!alFlv^ zXliIhQW=e_5st7y{%z8ccw^<*FrE#xZ%@$adL^s7-vF-+U)qaUGB$&$%FE}gr^`-59(RQ&xO=x19txVfw2c?_ zKMA!H$+~5T%@1qVhcm_7TfGE}*%F4y;Xj<^_^o8nNkb&_v(>zWJKtVEz`00b-*Saz z;!}*cTC0<(5M`yRO-qCkRQGwY4Q2Bnr4;-LM}t1LCtr#glA(qhaDXsUew^e?PzznE zwD_7%@HlEE0FQsMhRK*eM9TGLV#u_F?w?pSGGBgiI$M0zl}gr_TZZ~w{`ecM?xm-I zxSDY^MCYSaGXilNb((kxGX&oSIQYdfJ1pf@cQ#SXiW7)DUup<~yfD$E( z46yLnfJJ1R*8fRqIsp;sxbvGot=HAT-{qan+IHI2x6+#XL7BneInRoflykO;qs4#8 zqQ1TqR7KaWHsPS;sC_A)@Eqc~1WzDpXYXhuMKZ`~~bug|~&~kK%T%^0@EV z2WAfj;R@GCkK-(k7<0kIDpUUo8Xtt4Fk4l#8m_#$nyFqti=9e8J}u}1Ze9%Q6E327 z6kNCsekCcWgQaT>BI5{JO@I=R397!1zzdLEDOlgF=inU3(iua}EykCmoal7%!_586&U`)LfXVfD=y*!(!Dkq7s|RFz3#nO2Z?m3WAR_51(90fRuz$bn_X zEx_1sj#>?woI{~0y-7u;E2}nFg~<6fiExiG^}x#PhXHJ;9XdgXeal!+=ki89FddyK zAya}Q#k@di~Hh6pq75cvtFZ)b&vxk_B`V}wNs_BBssB5_cn4uEAB zG@0YPCn}!7iIah?GHW#EiJ??^%`}wIFqUaKn+MOF7gp$<>&5**CK0t^bu%9ipLR6` z*$>yMb^=7SjRPR;U^2pD+b?5>Xvu3eSMYtcU26^zp8P5Ekka(UfGu;}GV(OjO$N9R zDk^YwYf`H{)9>ax`(sIBKE2r}oy(@M7(?}&urhHbP%jj5YJS>?E!cUQ`5LU{9;XP9 z>svj;pNZsgCj*po+Nk11HKl`FzHnADzpU=Jd?HLkI~p}pWC z2?CZ(MZzAXJL7N>$d>A~PDlM`&!9Rl^kxLD9mG~n;f$HVr@ZMF)m}mVprdAMn=h15 z*G~UVYn?S~v68|lAib9#*@n{8H zlf|a-lSj+2VDZi>B6jrS}z2ZLnIgHxV4R&OMSODO9 zfXaMA<&k{yyo)xyn#qFfN(`*3gQ2~7gmkh~V9}%ccYz46q1~si6HAv0r+;FR(BKlU zS>drM!;^dBfs%@dK)hdD{!*Rb(l!X-n}hF28)@R&z>SuawUwm(`&a5c{;GEr4`fC- z8ah{3A>F1~DXxYatc4{z4k1B13<=Qlksu_miIC=M8>t#o(f3*&BEHEpZoY5G;B2FV z-$23H%1@Cm$~Pv1-Ip^oWm@(6%Y)vG;24#GT6s?nE0U+Q!Jo0CVLY8ec{=~dbYBpL zEu*fbOkmJ7>tu>y=sF!63>~PRnRpXmVF_}@>91~z$n{XG<~Tw*R|hAZ&BVQN`%=|-_mG;{?@{%>b#F_8Q1cX$&So65{r>*YV zvuLz-v6n+-TPQ~@z&ad@HUhvFDf<}VlWT4uo*4iPUYVb!{Ee~3`)oM5nQM_IOIX$3 zF*oPi%%D>lT3Zeq@CzHY^X#;scCH)ykYaFp0EF%P9{!4s!9}L;AXXU=reZf-{98wZ ztu}SxTiAm2@uWbX{%Mz4UH}hX9i{Un)ov^ddwo0m;5T|Ltt=7+|YaS@oYL379F#kp>NflRXW% z=^xIOqNVA)!G-b<=TbO|SWjUZ8a8gZH!x4CC^mEdLs7H-XX_G_ex4aMVln>j>i5e6 zWS@+}CGXI{f20#WTNV_0KAJ;O`g+6HFYm`o@;9a(@k)owe}<>Gg#Y};s=$n37tU~T zY}EZ^eD^qyootqqZL&PCSx>c91@r^sZ{-61Y&q&K@%rlgVe|S$w{olO#;dldZO3NU zMIS4>QEkA~IL~P7*w*{~be?pe&n1`pTx0n%`LpBmdDcBWa-uV-qLDRNi#$T+Gt+8c z^X)dhxS(xGS{z7=icX+r$}*9Zj$798exTyiN&AAnU4uH=tmycdckVIEn$&3AOFmD zx%=yfR6Ge!?P+A?Wuoh;vSdz&Qnv^A$~>wyBJEo(#c8WP-`8Mg$MOdUZELTci?5F8 zQ2{V?>uSqLAMvEOgI18eq`l&e(s-zno0##T>jYZ3z_83NXrM%(S!UDihW|2W!QW4g z5H*c@3crNMQwT7@oAqh1r(mi4>iL0UZrXkAJ}PH}Z!o;^X3j$611cGTvFZTSbZn*2 zt=Ow}yRwj94IsIY$4``7bS9RKtrK={Ds3V}jZmGulMBl8^m~`QUViPBe-m%HA_Lc`oI) z-Zic!?YeE{Oz7Jfp=~*Csz7!JC$O_QJ_c)iM6=}@%IyTHCNeWH9D@P3qV-)YYG{SE z(RhEo{|q={RSIh2+v4iY-#rfU(a~F9N7Fq(Yl2S|0l`klE~XA6Xm?<;x11&Z-aX~$ z%dsEvErl4Aasd&9pJ~+I7sw_vODX;@)H2${4suum^wdkd=0B8q;KEb1n0VTljlABgsbS7OSpz%@k9~~ zkjtm1{&8_Z$&5pCJLasSkKbpt)1%5Fe!8YcrdH}JP9w=~+Hm##Zy zu^`)^b=K@H(+{G#%jup`(aMhr*tYZ8z9z5Y2jE5%2V&!2{jsNuQ7A;BB$@q~o9{l8 z`!*xiv%^FkI1!9qO^i2;To_5Er#MkBO>!Iw!W!c zZ`OE){&hoIdnE?g^9WE6$=SiW`CJu7f>`ng4s!m*IqGN6KJo|@a7B_0dR!mx`Rv4< zsY8|F!$~?7EW6l;T-^NCV^uXL^-d?lR|n?E50K=M=9U85OGY(8kSfa@23sxi_#_qr zYAEvhg^A8_I5`Jo5lAG+2#)Pr{*Tbare!`F2fDz6)iMs~k1uqmy{yyT<0VLlkx>-% z(5W}b>&6DDRHi_!Qrj1b< z2O{9r>)KDw*0Zsjs?_B&w!j>r5!8f|;r%k_Rrv+qU7iJ`*Z$G7k@Ar!^7>vtlIKsv z8WiNL z<#n+=y?Nn_xjyWs&CCP=r!mYdn1wA?(W@V=q$OXJ%r7uJ?K>VL;u!3(JBCM9of5jD z!xF_8>?cn0#~mrKEoQtf!72SxniEvsa=_QZ2%;Y#nhxXWy_yCstiFtMo#^|r1U_84#p1d|%<8U64>71n5#&#{(d5=b+2pbq3j<*cy; zi#yh`@1Y>@62a6zhk&Od2xf@Ve-hj=x(goQw?8G4&hisJZ7CT=wyZTFvZ zqmyMBD`knkT*~0gZ}%piVLqO@@q}T;W2&Dv4}7B0Jxb z>@5>fNLwqqc>~m`JwZdnAPyboj^d%e8U4&$t zzUG%54otg$^7tF2GE)ugqM=c4N|zP``cBL!PTP#Yjwu7Qk^A=fiL5Faz(gerZ2$Da z5&~s=0m;26v`pyZB%X5A$QG{bPF2l?QtJen^ZUJdqGSyzj<=jW$Qo)m{@n0j*U_qn+2OlR2F1#=+@9$w--RzdhKkNE3rI7X^sck6+HPMdW=V?kMN`}ZP*1pGgy)Am zbW#!ded)Wnk?X{cyLQ-9ky6QfaAgI~;I)rter-PW)eVc&|-4oozUk;v=hE8z` zHMQd8#`yjjMM0=vB9VEA>M-pL6tJnr)xX4TuxEh3(V)Xb4E5#Q5;R3 z^Un?%MjyxlZClD%L$v!0CE1Og-WNEhgC6LAnq3fP<}~0dU{nw;_WzD`mo#?lH#m^| zX7vlwfLRP0NyHSDLixxBwTd;E;qLrFYzZ~8YL>e4+lTlZH!0Alavz5j`i$YN*wts>U5Ka7q!(a zxUc9c=Tg~YZ!A<)ia(A)u4aF_+AgO~aW_HfUiMA13B_f`(#NR)5R9AWfP@*QLT*&* z8@6o;DK<072C}nh75#2$oMJk8(rW~9eXW?1N%{ z;Ew0Gx3x4@7Zw;UjRU$sG)a{qk3_Jj>9tNo#k$})Bzue#O@|W|&f79H<#xD<-S^8I z)JBmk5Vvx$Qvpi=%ZyU$fTOqs2X~Xt;&DLUYXK`zZiEi_KN0q9n>cnJ8Mmk+-<9?V z3U=PBo0ZBj_W{$xsPU`D#dS?2QMVJ+pGhQYtgviFUNdT&v7GN=lhkms&lsG}r5N_b zQI*Z#w9<%ZpT9Eh$YclfEy`)Frp-3^v*p}ZetLR?7(gun8?QNDDMXD7aN zU9&H`CJyH|yB?5Sb?19Azk1?WdEtl+od!%U?_7F9eaER_o<_&vkVXrTgiyyZo0Ax~ z;vJBOy&nev3G>S@I7Mm2XMz4C+C&ISz10=n){z(miub&qZPwi>A0f<8TR`=8X}sWc z6Jd4hz~n#y2En)4KOrQvk`Win7MY3*q*|ya8pR;5 z$I>Tyav)4;H13luZ~Nv&a70%`Q+;?4M-&!J&M6;u<-_-1Pz%7s)$x(_;QhZUEv-67 zzt1m{2~qZ-l)JwB-|PVn-#>x>GHI6E8naLVa`d@~_pd{1rea2lTp=er5} zXIAfC?`~cy!oJ${@xppAS_i5{trwF5j5Wx%%JSfS36q&%1gRVB5r_)QAcAF%1bR0> z^EClNIe-(;Ak7~)5$#tu@8pBd7rb(cO}S_4Ei;q{2lew7yRLZY?&3=P zJ5xjZkro(WbfC)#%z1k&sK?(!`?oOAb$a#{%QgnE$>hby2n0O7rXfKwJKA>u{(Y1S zd9-beOq=GI8oIU0>L?QFD67r~>l*+#_V^;cEbu)n_1VAt5MF-<2%<|qHxh!5su|(# ztyDyB*u}QfxvEgI$0#_cTcc#NAc4wa!wu|$%77y*x;rMX0_lSZ>qUlookY*mnAaV% zU08fXpEZHcSm#uka#0U`o*#aI7CPwJLl4a|t$Iu07r$D2Zw2-j5vXym3kOCQUJbdB zIH5pU21g5h*HXdt2tO~)OLhoY8p!ef!1fsVa} zio&9m5}g)Z0V%p(^0q6fOgvt?WL%5`NljsArxA2J12)XKH|CF-Q)32#xBBnq1fc! zzvKLve$5Rw@Gru4wqQ(3jZa6PH}UN?H~d}MM@YbsAS==tBSP-#-cx4CT-a6KhHwO- z&A+uu2BtvAiL=EtNC7OnB$DS=Zcp*l=H4P71hEJJto>yD|DmKb!*O7A5Vn86*ilgG zq%A*Wz<4PzA(^3jeE=sE(oq8<7+Y8aR@@l$c>U!+3u9My!s)YwLqVozCaea{%bNF& zj`K>Rj<-4~PJte2^&fT4+MbVqO867H6)hV(J5C)93Y#8gE#`uT>1n$M>wE2q#;6_p z?_b?e_NM*X*_zPu*)H=sHmaGFVwwaujH0P{fS2JpH?~y*h889jSHJKwDNUW*;)08p z$7GW@-)3+4G#1(Cr_rdleh2l0S3|(1Ww~8y+-@OZWI;zQj*qS2QSXP)M#;>X*(AF1 zMEvrKeeI9a+X-zVT(xpyiFnL$eN^?7jZ+ZpI2{i=YF^EhXgi8yzj;!OhcXv`57SS>;ku&s=!1pdJ}BDK9KpiQv2F+_nhOXf7Ls zku@*E>eU-zZu$weN4M*9OL@lEjC}zrGZGsB{-F3V>^w)GHtQ{|HoGtV-apDzfTE(4 zDwGTZ@$)&(kNxd#>Jd)X3^cx&c+!3#;Lw)AY57*>?)E8SL2$1k+LhX$t4~g&z?ls6 z1=y!HGxw_7-+0i%mGjQbB*tmnQd49JwH2KpZJF3_`z)sW+lC`surWGRh~M5=#o*iGI+cn-4kD zEOF^?C+$mb$c$rB6Dv57i6J8AgD0}n&u!gS0)&@MdBEv9aBNBxt3y;k>@j_2sVlM% ze#40g@es8i)KoYj%v?oqKT;#1Iuv7>@h?W};XGfcQ^Y*15AH zf_H87q`4a$sIZlpClRai&xT#M>Rhm;`=C*$Vwbj|xEP$|#nY+@cGBi-u#_5C?J zJny1^JE~>YguDtT!xc_ka#hD4>Pk+3Pe@@$AY0S90^e=gqza z_t=w%Ci;%IxIoY#XD9?RCjh+X(gNh!f1c#Ec41dJJT6?@E1fWi012zaJe)+REm(;2 z029L7_~y|s$=^i6FzqB1jWoA;u&w;x3&?i!@^=_%S9giVcHv?GVvSsRKZh9LX-JOm z=d>eTxI7)Egd*2Ha2uTt)9~I4mQByx zQ^9G(K*ZhS{%IL(wc+&vc9!I{sz%%U%Fg%OwJzI{^vIjf94|fJWt5XibSrB}G+nx> z%!n@r@8qM4TGaKRW8ue;(N{y%YcokDQmWT6YUi|p9+M&#;Fy%D;2iCSsy$itCc}|i z=_+H&_`c%3q4I0at^zz*!l4#qDul1JEjEZhRp~1TRP2YsV1Si06~tIM7sOO#fn6B~ z3j+@l6no;#`>Y5;A4Yg=L8lf}iwTpc1L>}jsvTIcAzArS70DAVDbeKbsx*?e^PEMh z@E@Xna>vRN0Epd7p7D~hU)Y|!s}LsGdcZkZ{GdoQHy&((Uw(ZDaR<=;l_8dON;ti? zW%|%>nhdp`uC~biK)d1|khDrDhSZ~d+&z*CgTY6w6x)N4UJ%S_uH+|RCQfraaUASA z)ASrJ)U)!sLtCEN+g)Ncj_VyoA&+ZK1J%L~b{n#5fVtG{=3OHyZTQ)_DwK<^L!c&0 z7*%~KSij2ZU_7!k(z{S*EtOEX68)E zd&Oa(0B|580{mFw&=W1M;isdY(WpUstqBf^s<`861+Q_X2r2qi;rG*zRrbElJrVtf zjvdtLDDR3^(I?~w$U91v6fin9KGws8#FEo77)Djh6n`}MU%(3Cu;hcikUMitQ$5$K zDxx=3<6SFrt%UTutF}};IML5^INn9fk`CO80dR?$F8YmT@R>3U`eg6*q%(=!JLw9{_ql zg}(s$+@r4p{v2~z%To13@e~|ZWY$vq{u|J^%ablI4F3JDz>Y`^C8%g+F22zjd5N=+ zm8e?gl_&atz=eNyi)GV(@5-ZxXR=qIt-=+v#9z5*o(^IuW{9mKh@(6u%^3Vfjo-k2 zDK+wxSZ>t<>QcVik@cL+oVx!XBh|l|>lB`vFzD32|7X&0g{d}j$VOI>SP+W^*^5rI z2nln+x8Bi=W!8pjy$r@tXtoL}QnP}g+V|0&cg2!_3L?cHbNf$-6=VhM#=4Isc0sHp z7m@EyFt`q;Tui&ViW|x<3!queRg97>6?ss|!<5U48g3UILCJLj3&%P)O0mq?h)b?M z$$+gV3r-y~sV%c%AENY)U)Kx~f&dbbJPrl$z^J!FWk4dN$F6?o76vQQzlqCw;!+1Y%8DB;Tci_v02O?h@O=+M6TyAl+pF_&k|*t4W7{I~NI^!XBB^lcxgjk_K{&5`IigoK47s-Hke zL70_KAta%`w9iKfKWynN`QCpos;hp|pAZvimhtsR-KMI+;%@cH)p-K8NSdjk7IjNF zbXGm(3x-dGt{G47%F>kC$6W!Y5i<|pwWKe8xuM3^vID&A&V?3re)NnvMlmhb^Ly_T ziz~*S;EJIj)CMDp1{6|{IOl=P_cyQp2UCS2rIP`)69F=pk;ViS0Xdi9I|M0{mxxh+ z6>P>R$!NxAtNR%fnh2WBw4_95X13bQ{+8TV^?V_ONb<783z^vcoTo`!7q;Y*H zM5d}|$b^hB%AA>jXcbkA6QRXGG1_i_^EN2gT%BVJ7sNoR&+hB`_`sF6TMw9p~C z@vu_DGc_B~UB?7NuoUZqwT=jspg)TW9ETkPIu!R4Wil$!U3L=Nh(tj$kzHU`;vxm* zAw&x>B4gtJCugEu3`tG)+aydxTABe2^;aaw-7!eoS8a@;$N=B+*-SCdXNrbsXZo^C z5gtb7!b~yWDpQ^mxKM^rl?B}L{}J4-&fu0Qb~%$higbVcA&2@2E=ZcnNSQs+WW=yO z^4k~}Np$b5_c0cL9;14&7BNquYdeDLm=NG0c@NSbquv&23OCuf%;kuEu9J1OM|ZU~ zxDp+RXPpBDKG;GmtmmuJk!T9|I^p?Uee?4VZ$8{EU%!c30^l*?(F`^8( z{r*FYof3b9vEDTs+pO#2;GlvAREM^x_RTAN^U=sWP0I!6P(*QH0`XdhB09#4NJYS3 zEPY@El&nwNSM!CG5*)jO@|hgKeaF_u>Ve{fasFg8$9gaY*@%ZET#5+FMp-);l8TrC zhH%5DFyrS_|Hn8Jt$Pv`{aY$%s_qk!dl5XdfT|Rg4?gz@v9BRJSM>lIh;k@Beyx z8`TRiO!a@^a1qbCF?Xp*$|7^sB%uV9KB~q_T+ptI=F~o2cy<+3YLjdVTmbCGV@jkj zC-8q!@3UnSO)w65sV>w5uYP3)ER1wCBYB-}UK#oOjm#5;=hgELIa_+zZ(n&9M!RwU zS3=@?miV-X(YFl_B!?yxR-;@aoj z+BV^pyte+G0yuq?dQVT zj_kXmLtX6h`oW(BI;_D`CH70cJCuJGQI$DV`Kb&X>SBttJ|ASrhtuYPtQ1W`egGj> z>W;4mm(&hVr45LO-9{NRwsB|ukloEEa9QCmIOnd-eG;WzQl`!&d(bd1{e6I%yAIfh zISa-X&DnIGG~=0_u(U5p>pV!l^C{(T?3V)wY|7W$n}U8uv&3wMZtgmP>r2e zf8O=e&mDCuFmk$dM0Gv0$zlV3=@ zbmwh=C z&f9S8XD+yaeXuDuwhr&tfo1Df!XR~>H!Y@4L`}?(#^Q#pdGgT)|KklTstA9qI!1X9 zaxKDw4$ps?D>yEX)}N!p@VI|ofQySYs_*VLA*G78+xFoUrTZ%C-mHIczjxw-N+Q8N z0k&?zz=-z0%$-vk-&lC)Z`{|12-pn{9Q_cmvP~PqH+1LIhpqdn1n;6JhzDE>^@7T= z9)H<~p6la>E^o&TeApD>7&19b&3c&3+OPRQVDqMMpn$Vcf~%0=8v7dljMID4!9zYb zhrGsbF+yipkg)d_UtoXlRUD9%?`+qq7Q$r7Xa5BbNZo+1yHbJ&eN6Ef5Fi5r@Yb0M zL?O^&Yl8>gQha?s;djW}c=FhK|0gI9#y~d)%>et78ce!s*DV>jOJ9m#JY1=uVet^Q zwpfhU;jt@0)SQC^mlOuW{$HABE1x*^&fyS+60q!qrXr{&TJV2~Ren^FTtax)=%g*a zDEsh8s-EZ<45oqX>DG>OXqpatJF=`T;wLQAh$1}nGZ#vCZ4r<9cs5b-3M#GcjbDm=J6-KlR#QH-~C( z8?1rL!KdSKKyd;yy*X7Kj|@>+`p7%ij`P2-=~?C!=HAnP0xnTti$_>92kR?;fLkyA zdJkX)a%BK4FWS);yBQP|lHxESFsVFS#z!=7Rxkbo?&|`F6O#e769F=lI+0HSl#_~) zAb*PzEhlOB5Rf5f9v|O~W@rQ+00QqP=*0MSg2jjc1~V7BlmN^yL_Pt_Y;tppz)I|W z00?@)418-WGx0|PAEFsZ!KcZ;Sf8SX1SQN6ocSlHF-Qi)hKSUi34a0U9}w`^_;)n9xMm0hFkmhN(HaCo49ox% zw?{Mp(F)v5*ZX`~CENVWp)Q?p0!<=YhE%46wvX)(4kl$FrjP zaad@?to>Vwu*g;6YSJ-iMEWepWnz4zSZq7tk<)F#%edN>4X$GY=JkaJrz%Ai@M4%G z-Ta7Z=1Pe)`J(h$%)@tz=l#BtlYehXwNW5S*ZU-o%O57$ZkE|XQ?zuNIEQAW-m2xu zT)tAQ+8ojRi$m;=T+r^K^WVN(|6RR`d_6q9o_siFbAnNG^dFZqdNCvWx7Q=~OQZLA z9nbW9Wqh01$nm%+Qk93*My2qf5LVmX)?=LR`A+BZN=`Hr%auZFZR%B8Wq)(7j5-x} z(c5}XwLPwHXw|n*M%fI@|Dw0`dt{D;Wvoq|Y*35eYf365l&`@+m-3Q-74fh5%I4p# z_br9d@*W->F3&FqpAdRUa(4^(Xy>WbXd90so<(gZ(Qb`HM?1Bv@;(tdCd)+C_HE90 zvM1)FSAAnTr$hYt0q{V!_kSWV8s>=nPsc(&5{0?Z^+cr$0>71}+O*f-^=iV3uj74M zy|mX_4-c(}D@r-%D?NMUrJQc!N5$iwbM@KO_ulIyb!;78=grIco~oPwLtwf|YYWwI z|LT_83Pwt{ z#s(WU0rWA^78@H1fs6#&;-d`%yU?dIZIUPxLjds`($@xa+bIHrxO9}NF!5TD&j=GM zj*O17imcH&79|n+02DIM1r9+laEZjgo&oYtH2Dwd%g&SGG!p_hGneo)0u%x=HJ5Ro z0x6S=l@EXLDoP0^fD?3rED{v$3{W7ui-jW7W1*7e>N1yo=x^^iBqd88I?kd%pGc91 zhvd0_hoX!71zmi+qP~59clG*)=>=mXRf;d}w$P;{QF0M0Nx0ODyUpU~=wYd3l$E_% zu7uK2(Yp3`)6`_SQd~#>EV`W=+!Vi1CQM`YOP+s3of~XTRW!x^zI_GXv5Ig~M_p4G z&uU+L%3|-@Zs#{UWL;+(KbmL0^KH2T24?x|-A}J?R5HDR&?#XO?q`qA9LfBWF=5d0 z6t6-|Rw7Y^r+l$ul1Ldl(!YG#Z$tdJBr#3C%P=7_9SBxG$Yv!)64ec~ERG&`S!W(l zY{q|%D60mXv7*9GvGH;(I+Xph8=WW*SJm5-RaL#Oiw)>eQxUB_&&)NAY5TIX7h0^8 zCYr``iQE+TePd7=35Xh+G&(rdG~2qVS~pX4zLVG0q3^P;sQ0a-uJ>-H!-GHKv1>Q+ zWxY&zboY7v-hEfulA*=qrhS8l#wuV(3u&9IHkw+-}=t&*6nP3#q)poHm}>R1W`Zx;cf6ZplqFpACMGF^+ZxmIteAQ zmUFH9V1v7Yx3!0?X^rdS-JH2I;XL>Ga5Q-s%d^(ty$QxJ8hz@9;k{!pgl{)G*pbx( z3{=Pw%`%C0Ze*La9fmzzTX-I*CFZn*cQ|oxw6@xc6ij_?X^@B9u)6|u{M~;-QiWua zN_|nPUxXliuR2Y?#i;v_?|*9#CjSj?)K{bmi6!L(Fu(*vVZwgg z6=OJ&0}L^3Nb;$G{5P7?*I?vkM9Ir$5dUAyASL2>7=K+vEJ08kgK$_pPlAI?D~K3j zktTqbxt{HzP^94D4m*FKRMSgRjgJm`)`jII7e0dM^FEwh^x;3CyPyFiAjNbF#Q@Y; zd!Lv#2xqizgrv?DR$M3|^$Cj3%YI`#Xqkbd&O@lOXgk-d!^vMjpCzYm^5>DFyfD3j&J}!t@rvnG4MIYPVVII4%vUzhNJ|`76?VS*IOL7 zC0^?j@CZsbMVt2?k#S!Q!tl`En%u=8oCNHA0kVn|z=Q!HV8hr3lTqqn9?T3F75mIJ zjY~{Gv@@9tY{Nk9R~?hxpFBT=W(fH$_(?~+Z>dMUht9Y$PY{%eE46MA&0w*|HHPN8 zwZ#nun7+-bqAY(hKjmOF_NM#dcd>U2hcCDO2$Np9iJfUYYa7I2q}|m`7mCzvUsV}s z94?Hf z=YO>K3`RkfkeDa)D7X&CR?22o1(y_pt1qtcV-$g%2u6S1ltQ-P7ZD_sCtAGN3mTES ziqcAbx+P4w6b^%JRt>%4cjG3{WK<1x!m(CbpL-u+llX>wtwSMOUo@c`2Bk}58;WLz z^mKhudn%p6FT@qrXZzef5ycv1flXeQeYJOe7n1T0+$InF_?Ho+2E$;VNwt0z!Yj;IB9;NWT9LgqzO!W3xmV9U`Z2EbnXlLeSl-HN?GwrA-InF-|sy7*bc z;^5Nh@loO_96FL@)NbLQT4uWdcrLSixK(k^o4}H^K6$zdZ>Wwq`K&pCd<7Lh5RB4P z&CN(0&4^*L;x1W1{5DS+VijvP+>`2@9)F{5q;FA)Hh*f6gV6JsJzA)P2txuo0tP)v zK;e@7uXk7fzCu0G1p^5%LIF%O&WMclBCoD~{*^8^FbSy<>fmJYXh|z1S1^(+%Ej%~ z-(Rqu^W;!ZK%NEkntYF^NHozB&1%k+LCbB#SK`ItUNV(wY60f|F2!@L{cSOON?{U^0+5d6e^g&GLh(4l^5GCP5H z^VCmj-^Rl#9y+faX2*{5>zJ=RTvS${Ejkeq3S|9`(H9R(tMg=+M8_0~)01siA=xpd zVIwa*FiaWG#i2%$3*u8}#pliTKC`7gfJ zs``@wv=ag}GMDi)0uur?Fqh#w1SylQo}&S?vr?Y{0Rg+SRG^^&f5-Wp4vIn=ETeWj zIJO!9ttmvo;QkQm(HAe2oZ%WldldWzMlDjOiR?W?nPnJsMJfAvXENT#zB6+S5Ues$ zG?K&7ldf0#=|1Qb`S(LH2F(vW**Hk6>a(YM7HiLRN-Jf>j5-5?6Ls|{8e zwnkMT?3h#w0s?wqw(A(9AZ6|U$0PWJQua1=B^wX8_Sibh!0HggqN}#Rc9NyRe<(g! ztw8{oAO;yAq=oEWwnQ>+lVpNp3}N^!7_l4VP$Jc9UYpSje3aHh8Mim=aA?C zCOrw82ojCZ_+GN~DK4vmzzu(kdN6vE#heCd<4ome_=s{2-|_Of*00+2=-CA-Z7vmDSQD2C{xTlgTf;A^9V)5$YY0<3EEF& z&!q(@DL7!kKDJOCO`!|cHyrIIFzJy>T<4@5JMeidVP=kmCKcw=Y{05y&Tt-UwoO$v zbZ|FIgHh1#Dkm_?$cHKNia%9RKkSMJ*?=nGd=E=le;R8ljk%&l1hDGXNxyM);K^kX z+~F}PNj?$E0SvsVrbvKs3ItA_A0R-%40ZpmDkz)-px_Sx9^?!u1>6IX1Aqn`7@7v2 zFl{g(zpsZ)MBa2<_XJtiwP&3b(_ATO?gjiww6l>6_#`&7s~W}uqXU}ul_PebZhsth zZ!ma7f3B4@yltB&9!;V?pFw|YCgb@`q9d`S5Kil={InFaOY)|Ac_U%egr^aVoCsG6Z+UDIl}}~% zwrYyr7eb;~rI~rN`LHINs@vU}+zf3@9*s>0e}*3-uja2*8*FJO90`svO)f;6E7Dg% zsGU3vK-3GfXzs%^7chogz+?#pzoX`n!udO1#w0VjBSNnDQ@H|7gqJ#okrm)TdIcKq z>M0L<9Dw#%(71Rv4!I4(5k@Wi9=<8yoLn3SQ)nVWkh?4ii91}L9NhaP5<*)tDefQF zfA!XD0--YDRH8McCuSU5++L^#eR8=5_;I9tZ0ejt3spKgJ5o8Ua$5nXLTBO4f*Lya zY$ADehGdhgL|ZS(!y^n{M<%HAi8_u{OkPgG`U~+Wc-uzk+|^zAFlcn86Xet!KZK0D zQLe*Oqs`DooV*8sn@#UyAB&G(t|Fz=e=}V5(un0!cEk6JYa7h4HVl5o>P1!`Ij2%V zPmepfJ%~{B!jogA)Ny(Qn(1{#it#*CS$@RovF}Ku(?S;}aT%$5l$PjSe3tc*3s)rvnzLlc!o zZ&(;0**H1uoEeSdy zg+~C;d!g)U`1=J**cRU_IHPR%eLauM0dWTxi&2Kbm0C2Z z`Jo!V+JERoC@u^K0BJFZr*(_%8?=Qg6MmzwQn)`smBgbta?s4@4-gQZ5 zOh*<6Ta1~NBm<#h=2TOse`JT|#ZJwOmFC4_^P+hEE0*T2=gAd+Ds-4)av#~+HBWNh znQ|9arh*HOz=9~vvcnV>zLmVNt$&7;kL4Y7IvqobVF85LN%Jokl|#o5;=#(kl^yu% zgabdVgfbjn`}$6L=UpM9_(0Xlj=qN5il)L+jpPQL$9$Ptr7 zNX17NS0lzKF_9dpND?mf=wd#4Gv*{ZoiN6u@!!`)Qe^AZ=|t#%Xq>F(SXg9vfu;2o zUSDjJ&Gq@q>1$uTfh!xQTlaQ;`tIVV?|)<(x+8HExt{50!Z~5e_c_f^C!EFO1TLJ8 z^WAcpY(HY1?ZjShuag2FH@dSAX2}W*^Ys}NBpVmkDb`%A7mM{N*W=r4^&YE= zJJyi0&JNux`(mrxJk#PI3O<9dnvm{Tkv`C90-R5{R zPgm>0KS=PhOp0xGcV=(2A7=X&Dt>xzlL0-Gt(9SVeB)l773-~2QSkB0v_NNA7}U-% zl0-~EJLrq^ki)nDnNp;(0eW#`Z3|1=-EL82n?*J&aRc>#4kT8vGH2~yux8KF+>6;K zpt3=0nQ@~75At-GZP6nWMh&4{v<(K=f_+NR@rBbR=J<|txriA8#1?kBe-0JU+efUu z%vKI;w(MMMoNL!xtV(jcW<`!Kzhy!;jQx1Z#^2$S1~As!v!HXFce!e405`Ms5*xJf zhHfjRO7R?jI%js|;NdJoF@s_-lIztvt0eJ?>cT^-ZZe;Lht(|=iB<9Bhl`V6Ppoh0 zh>aLGL;@Hhr3uq&G+Um$c}GWccnHQ25u4Gi6J3skh=4UXlozAdC;vc#QFIiOSaId} zMF7{tY!pR?C~9yBUTn{QUfwM|5LrW9Ls7;h$S^8@CU^nmg=jzw#vBG7o8ZkJc}!Ro zm2LdRLs>RN4B@&GxDK=-h|#hXcok^DvL+0Tm&gK^D5BiJIDzLOJt>5fx=&4zkj9h} zZz+?3W^8FMa9b{za1|3R48YI@bM1qhULRDWgBY_6(r|;cZZH-2`+QK1;H@`Kz^KGl z6Xyee2^1uVVqPH{7y(W+s4{6t4SH)C=qo0XG!O5q&#i3Gt$N6EF{DjID2-d%9OD07 zpgTo`iUQrWTG=;#8bp#&5ISNXNz1#NAtd==t^xHJB(N z2@*;^ESPuqaI>H!X8cIp40)K4vi4^BpDa&*Ux8Bw89H60%XDR3SWIr=rL%97kXfG( z-_nQinH`WXcsU2Ge0S;B6i=(TR^k_oTcy}T zF!wjFO0yCB*{>3K7HB~d$*Z-?PXRR|5_8c7T&IzV(Zp12qQ1SveHTT z8Gc~6N1DXuNFQ&cjG@u;RxJ!j!#X$*AZ2OKAuD;9009!C(*Hn-)6;i1El!}{5Wy7-Uj-CnN(3d zG_dkFIIum8&0?ZyH+$ZiAJwNtPj({8k&~Y5j;o50haZa&KR{3U~ z57Q1xx@AQz{`9l1omRAW_}QOz-7R-LPUh0BN+0Uv_TKEOH83FdU*&#CX75LzJpHO*(+Y*WpDKyA%p3g%U#m2BVd z-0l#&860(Rd-{c-pZX#p2Yyfnq;OSlb9ei7u)^Wfp$0}n#s;ijIPhXWmQbz5e883 zvRnzEW#|gKW^g8dm!YU_D9+FPNJ&O0Rqc_&UHRO9+X|={0m=;PhFn`k2&tOaaT7hX zSgB}lX6xmq-tuF0f@;|}E=)qNjCEtMJHC+?1HCTN1g1HA!|{9pRQe6?V(byN%~D^H zToviIoH2n&4DTEuuo#TgILq<6%U#>HW)(6+K}6P3@ZX?+6^zt7Vx9-b0sz1Rbkr6R zIzUIV(^Fit??um$Dpqmh<<6VyhxBUg4vavB1x=s_4RgodAFsBiaTD*?p++RobG*;+ zZEJPZO(olgXdv0)L5=SB42Wpc{v%P65->*u=>7MpVJ@q-l;SX<-KiQxkdi>M%p=HC z{3^Yv&gpP})eTfUhjEi7{^_+p-7&AY(;dXHA4{XCLwP+%c7S0T*~zqLJPi|pO-A>? zCrnkcxxK48`(vcH1PbN0Kkc{uVQuTIN%@8&d?;~ge4_5qL21ZFP97N|w;#_4zckH-ab-x@~V4Z}vnP&YQY9Z&nkVwHNUQbxpj1B4=2OK!QpS zBuiV_GR*@-B(OKNadrg(@CF0EgGy&cRbBBUA0xl9$fZcSUaT%U5pdtCnJ zcvH`4al22*;)S|*t`WfU72w^ULfg1MlUbS=POPYApKMNZrQTM0Vpjr@>G3Pt%AcA+6=HE1vH_RO`I(@V9`G=uMYes41Nvn6P3NU&-2~V#>@17 z4~^e&t!@={R+OI$vX%WeSF07JiSVGivrLnm2@$}T#VSKs`jxHMW3wjk@vVA zB)bsnjMi+u*ezE%Gz&{X1i94lIGJtN_JAcM21rA;vh+1@h5axOgJU2VUweI&jztw%Wy@t*m0u);5y*!1>U<6yJqU9{w!A^Q^7l#m0R3vPiO3UW)jT_kCZx2*1fn zekifqAu^b2M4hIAD|@C$(MZ&M_qC7kRB3ta9PHyZUp5xN4l3?7rdm?4I{qJx%D*a; z0kjj7Zk`kaF*G@sv2g<_f2~(rZ_`K=e($fa;Kh-eVXnSZTBMdOsHj|)gw;xbkg+F? zCC3i7OP7DYXXcC@J27d2eQ14r=J?FH&G${HcjHm-#R+xKi{;6a=R$fcB%CSfEmt06 zltdr~N)j$ZZ<%=?d@-9d#+Cn~PPXfd>+{8IE<)u$tIC;Reyjg&e^HT?843&i^;Vau zo_$)reDa)y;FiQnakB&U<~$%gQt119Hdmbcw~eliB>ZH%E%G#JbMS7L+I7<6QkK^` zoxzU9cU086$aRBeXfB398uezzL-5|>TVAHcZKlz(UAq8Pv=<WufA}9MlZ8GrAN3XQo|ns$uO|#d>M`)f2!)!fh>}3WUb;E?@QHdE zEWY$85pm?*SVIBHYl9NEG|Dk?AdfI~$e^4(baliw6PGCtz(~kACJes({URP<TzNT&|^Vje^nJlWgzMewABA5U_4zSNf7v}xgXNv2L5$_-rNjscv@GU6{5JYSd*J?6t ztjoN08G)(>&H#MRoMnYt-~|+HD0349z%vs>!5EKlf7*n33bv7OyJJBk;dTXKzu2Ja zi*4-;(HJRBN)(c&$s4Ck@PSNa6+@=NeQkUlfjEXstIakq`t~A88d8GR!OqNl#{3&p z?ka9w0W&Lr)KQ|)IWrYJrZ^r(Xl+*JUxkWqM%Hy>V>61<@*uEbpskda+IFMxXTop1EF3L}&{xWjKxkmi=ml3%$jFt;xO(#%6(6ah z{FI&w5Lh3yMFjLk2tiAGaE-9iNqbx4Ee*?{##c%6+H9fItHHHT`J4bwqiCEC_v+%- ze?_B@22quGQ@l4Eoz$J>D3oT({bOBbP%^i|1&P^VAwOTedV78O>hksSeA$=p>5;Os zQ+Y~%wXCbU+Mx8U8*KB!h$PgN!iAQMvz+WkH~wz)CrjUzV4oA_f(YOk!!;uAIB^O; zV15}OPhpTpFy2vu@}!sf7iK_ zpzCd(8m=pn8dW=V4IR2#?2Z7^w?_B9&TrNhdaXXTlxdsd_8;}+{k!XXt77sqkSA&M zt2O!ae0e!JJTcq>gF#8`aCBF)4CoY9=KSGO?(TN343;_iO5Br50KW|&am*$D)m+RS z^})I9x3?R5yEr_}@M>?M(T=svVQ+PZAP>wb`kkD9*rX5$FjPO&B)xYX8|1ftD><56 z3~^HRyA8X(dUpAGF_~Hrk|1P1N)7G?@UvkICc_zYr_&xyd(RjiP3YcXh8|=j&2bU{ zHGyl_UOw=9_CICYdjXRHv=jj_mjUeo6aqOhlQ9b@f7Mt^Z`?KzzWY}!(2ERsX_3^k zJvnWXI%t73>jDANAYfLMm#9|SQk0$a*LQ|P?XGOuj#2c`6Ne*`^We-kL&b~hMZ9=@ z5{L1{)yerQo-a6OWtxh`RlVS%UbC)u@6@qR)eA+|v6BzIP}XbhOHKwg-7h7Dt_>rKYAuil)ap1NW{ALF=xa)uo{)o z2|B3$d)%-GuW>womBovam6^zVHiL-IeI8aUC6k$ie`7i0 zpcvsHA|VEg%t{Dzh4+TQSi}zkV^$o8WKUaFbqHFVA3_vo-Hz^R7VkpfqEJDRB@0eh z0Sv-WARY*dEauT=eP=CU9#V*T2m~YSA@A1o8y)1iZoh-%!Wlr$q<~kbgiP{5{go!i zektV)Bw;$nC2pFFCmutEDxb#He*{pKi*cQYZ4|feNRsUuV%%T8-E(lOzN<5Mu(-j64lQXDfdTK$@`mc4I>En2RC2)%3{O zFFyUMoHbuQVG35WJwq&5n(VizzSpBg5_+Yy%%+g~nJzvNQ8iEpp=s)d-To6PD;|PQ zE=!hWe3bd&^6bso`;W7+f1Wd*?i(7@?gmFED{GZcJ?teC$k~gY{G;eQ)a{{h2wn*Q zen(7zud!*MD=RWq@N9Z#-sh&hhUav&?8uk(u{gf5Og19pD4jl;I9W|e+i15M#a*S;?viD zG6#j^2v`;Io$tMH<=xHj@z(hI4Aj9`nEdJW-8q_>AfX|D>S;+k!9G& z$npOGe->sm!Q0IRz6B-Ybl&uVIv#j0OK;V;C;*}0E4#!P!$KY!W>Ca;4fykeeyTu1 zQlSmMI|~15q^w9XD5H{zXDN8u*4;OG$PN9dp_l6;9Tc?VMY{nLo{BU6n;C;&PV)#p z)vz!`P#;1F@Z?!)Lpo6UJg)b(4)?n4Y|yvTR8E;D*g{?&GakM+st-Fz!ZL?&2JMok zwaX6L)%bJO{H>o>&_8|P8Gg@CT&5|yC4D4yxMXE6pOZR|{3RA;$)HPK6eTONkoLTo zG*F<$KY60p{F4E+6q7Q&69Y0ZHeq+w@7~1ebgqh-fflDpZ<~9Zbn}1u z;pX#=04WbdAdFzC6i%Jiaj@Cn{QN5qav1zR;7p}SaCB4m0qiF82Gh)h{J8i&)0tStUzS8HV=Fg82v?tl39gEN_*#bxe-5R5}aqFA#? zsh3>ffk~O<%KLDxcCJ%KDu}fLo#ubVuqfeF@-w9Z{>7rwiJ>?!C@=|2xDFZ{Y=6Tx zLIA2!5YrcJlNWYfDvY#baTNUl0lfB4VBvu~^4MWTg3|)90?unA2n5~p!RB_mT1PSp zZPoAb@Ik>E0+)h;ZqmP3dD-rawZ!oBTw zb>8|;_Dqe&c70P(`&e{4Y8`Bq7u6$mHue6{cUf1|RXejN9`_wKU9Gj8>{-{tSw=W^oez zu^BWmbecg?#Ekrk3_>;0%l?0UT?YOC0sQ(5ioki3q$330Ra=ib>>;s2&j6Xhr5o1na;-hFF9RD`;|0dkB({0!%u(UhY2lqM7YVy z(&n>)ZQc06tipxQG+K8%Ol@qA@paSc11)SEBe2!l7!$th$Y&F=R4eeAZe(XqZhzrI zR~E=a@i+v<*uY4Eq9n{$BDBL$)=_J+Cr&*UWk~~lYhNKxxdU;AY|v_oKTg1t!6p7= z=2I9hO~St;R}NsASh|0p5-$9ip+L+!HDQMK=vaj{2oG-92c)Gn&gG)r(X4r_VVIcv zv_n_ZJQo`jfSV18vcsV)HraZ~UVi|oOvxoMO-vk5(2-pwSz#-eALU`DD-J=alN1jzxjrlC& z$>9uc!4W)SLggf@tBs|pg59fFY+4~xlF}TAPlg-qE23Wl{>p9NtwBP zL1E!1Wz^j}8bbjxd%P(uC~|4gE#UxCq;i=TC&L-Kc4M=q0TfVXgMjwNma`EXk4@bl z7AAD{qs4z43h5#i;k$~&i=_Wz@?fEza5^g)r(>}TE>Xxp*x1i~(I8Z%+k2P_)kD*7 zIuaufp?H7-%BJUqLnDS3164fkD2o0DLx!OCm_N`N?iQCjcsmk@LhE$5zC~=#u2A=a zAg4(%T$FQ=kc_S;cVRm`FWo&3-JFO5jv+QY{BwVe%y>i}?34R#kTNBu!1~ZQfIk(v zVBu8f*>h*wny_6H$7arIGQ09Y- zGC{i*WReF5COKu2l9-~lkKpz6?B+0g&7DLP+H zDyV>jS)La-^VU5ssES1S8D4Rw!z(l{ z@v4VH8C(9SsPnXnAcxA{`f1yy-g|ZQ=lE!S{eJOOrJ@;CfnYF5jUOGb?Q;ZL1@V7P zJ>=mN52~S`DH+I+|>#ov|O5@4b3A}__5^mjbr>7SMv}O*Wz;S<# zO6mJXX{j~^xo6XKc&Vi4+zBHdF}F*WRoUK_&##sePHPtFWSVf_e5{_baxuqZNHKGw zhTA25@E8|ruFm7(bqR%KwO9ec`<-!>{*Q`Z-~Ks`Pm9r*my^lztqDVa`>&}3=wN~FgC?o$?^GI+ZA zCyO?}P9<@ujGK%0daILFUUc8FfOBPx3yraZfUmbkA?fYfByP2@8wO6WsFkmFMfJj% zT2M{i6?N4zva5OhEmF2B+5_ntT^Mw(yHa6 zCUhow*PaGQBQsibV5VSI%S_LL;)j8Oj>0(2g2o5?4`h?j0XFu1eXY?~a$PfY7UXg6 z|6stkR*teL$ighvF$Ed$(F*P~ptV(FX#e|5wTB3B+R^Tn;s|%YMJ^g4#n4}G*E)vv z{+{>yf+B|Cf4nRiI1M3(n{9nO^_|4qG_vkLxfCoLX3kMaC!qqh*E#{q=(BVr4`qN# zrsPhCS&|JI05QMy7|%-NKmdXrQfQ2SWhwr3O7VZIR+0irQb4h=rSE)ryM`E9J(e}G z*kUUFN4U7vk+C~3_ia1UELbNIgf8fyxFnEdIb5M#f3_<2ti@buZ98n%Y zcf}UtBY!SEYdw=%v<8Q+wXvk^dgrNnk}CkW@2jm;Slk|Rltf%Zn6}snga?hs5TY$P z0@i^$*z_AijV}cv$H4kh*3Ws#@SnHNf^iCzSy)vy1%DER%uMdS+M##QhU72=w!jki zOc>2 zbcf{xB7gNzP)Hv52;PNj>GvH&Qk|$^fwW1aR$unakj#uUY-YN7V&(+_Sq1AP4-R?t zf`&o_GXr2S6~a?fF-8&tYO5fo@-b$vr?4NYD5{-BblX%u$@+`xtmA2QE^5d>s?M0< z1yyvQ7*iF|SnE(4irgLw;WsAYjzjT7hr7d2o`1-@0PPA&x<<}2lJzfpzfh>er2o22 zAs`zJeFKH@8{OZ5nkJ4Z`T)u00HmLU@w~87tU0Jh#mfOXbWCy94ZiIg&nMHl7Z5>n z9wlPmT;%pz^Kl3yRhyxk()V#R88hl?tib~^)=(-oS7&*w2`^@5ct}o$LVpNa zNKG1=O_q!~o?IT9O_BhnW*8)YxzNu=P<(nUzJrR0Ia0cO!wuUnMf>4q7(kH;uHs7s3GEQu8JS-@hVjd6?*NuTxGvd1O0WvRN>IN$8QBL#!1Ngv=lw&xS5`y?+ zz9>-F6kkyjC?v=o%6BmvQ1~ClIS#WdRLK~KKP)|ZlnJBrZ;tPmxF~p%!+!%;NFJEP z8hB1-Fh3DR6e<@7HZ&$<@^J z=mYi8uw&Hc)ow9#bQquA_V@-Llcu9*tG4s`jz%@J`Ug0&c`aJ3C4WXMb{q&jcTl_N zp+tu&G9Rm8rz#NlURVbBS0s#`y7&=-R1n<$j~=WKruU4XCXOGd)~%29YX(Zmx+{*p zT?_i`068^(c#z_D_4fyilFiQ#k8kvH993*2(-8Qd%Qd3LD#%-U-r-8q4aQJkl-+tj zvs4UmkRzN&n8OMkz8E(H7G`-i_QI@v^FGKLd6)1aVj&Cn|us_Xcv8m*p(re~t@ zu>-Sc-yLuY2l-;~C*kA=4RV{l)e%w1`nkbBAktyjt0c@!@Iy`&1#1AWv$+)!HRhZQ zag32x2`LNHP|Nosp{$cfGHwUAfG|hK6mlib5LmZhh~N-J-hV)L@!A|D^?g&cU4f&Q zQp4D%wEX&mNbn_u7sD(fe7v|FFz3kv=*|yo-jnU7=l4AhqZFJGXXVm#JpFf&Wvc?G zfFrhOm@CG2HGCNF#!XM*2Y~eQg8vU8X4>WHdFdhy8&G0s^x=|;HV41>i_(QU{h$6& zV(T2Ebr_OCB!3#eS_UvstR5azSOz*q43r$W2+2H$AUPPR$Q7Pp`4fVlPg2C8{=!T+ zS8EdAHjT~?wp2P7BE@z9F_}8gu!w|Yhsf#6yb&UiEFA&~k%e54^M}icf9V&}=;%Zo zEzj>guano^be<2CgUQK3b2c#KL@6`V(TwQF&CP!SUIV=JWRn5369P9lmjUeo6ah7t z0mlO=f3;XkkK{BEzVEN-V;W7H*l`kv8!NF32qDA_CqS!o+C6PZ$E`S-2fv;wm-Fh$ zEV~F!vCHLhxn5s6@#cOLZ{EL&ufA{Z-hBCv%MItONK>)7J8rm8OsHa$rIHDmZ|)A8 zU!&h*o^OA@`+nRAyYj^@&Qt;`p8FT4+ucFKe zD$?!Qblt$)*}zm)+Jz1GwcVFBjUTotN-*WV){bp=sVt&z?N;Q`u^RpKJD zEv?uK9o3e>_+Atm9|x{8bxj)rUvn_c{mq(UrE~om7Qx&+n=61*vcOA{D5=;b2eEBX_^N3G)%;Ic}`Sr+9-lk2^4kb*u}=yRNm86e|j1`r8C&-es_aZldFe+%Js zA0o#u`P_u~ouwy`F+G>IPA6Mqj^&VJU)P;(8?Y256NEb08`C9KQDx5;klJ@+pd}n^ zhbB$L81SA7H?Dki!x&TDbHesocY(uu0=`bK`oGqduN5LoqjTA{ra~i#ECYiDGHFe{mEzNz(DH>&}{ zMZ)5w_^9Arl4dLx`O*}N6Yly8hQrzcJY!1b(*fRRXsxx#7GRRAJGA=HS9(o!u2`YO zz>WDk_|!q_lr82b1%XerYsU{R-U+~+jITR7fdVx-8fV_oIG8z_k;J=`e_r);&RIN} z;2%5<%rce(^8j9SqzO7)%C<9A4|jx2pgTyxWN~$gM@p7@=X&FBBhwgj00-RHCK_G( zHl(f7#N zAW*!4@%sV+QJRMa7z_vq!zBVj4^TwMR%?kyf1s51RTf<5aWIHY5VnF0SJL$qU&jZb6id?dbC7JT+Keu_iZHK% zzQ~ogAA+mzst|uEP>{3;6eO4dqRET4@4{Y7^bX$uaE>PO)L^cJ`_Lt>nE@h3EWqms zQfu?qI(X$Q&#%I(rk4cb7MdQh>ZH*f2{dF_EO0REe{nhse=SFpb2<1rnfiS{!Js{m z!FkM?xW)@Cw^FKE#e<1$5A=8et3Mj3b!a|Zz#I!D>Sb?}GnIW(p2>ma*6#AK_T4vg~{l1=0+De_OK z5`yt`^f`6@e;92NBeTRflNq@VNy}UKC9EgI5_X{nqI-}=df1yS{EY2JtfF`R(b19> zvV!GT?sTE5R0fMt1rx3oa#pihah6S8)ql~Ja-RGjZRzWRKcgGc)p87*pDyl7FAG+(r+I3He(IHu5{Q6OCb1W;ade-}tB2N4iYyIH1-B|HwHmOzC4 z;oqA$IwA41eQWZSPY?o zh4Ekmf25A}7bhn`qu{#0XKCSJY3{<@g!QqPz{%9-2PV7-eor2^DgDp}kfo1T-`w z)BC5lA6&nGUFI%{6-zU<%+sqJ#~+Sh+IQHA^<9tcwr@w(+@Fpp);E~>YkozRb2+xH z=;TiOmo1FxWr({WTGx7hN@0;q|Nj(OI^n_6iC9ry#!2+EKGI%6sX$E*5%g1u0UqRZ zDuL@12znHJB*Lu@Ph%tsCYBGv4TrW=@&YR$R^eb$!53WL+`ai5zc=)kp@Rey12;G` zm+>z&-v(h&f5#mJ}ndro zvcR>-BCAOtYwX~3lz-T5N|J0IH7%q`p6kl)T_bMLz?75n4Me+7Xm`hP;C`AY=_dJ6 zzk+#$uxMQuc%0)NVUdPHx0(2n`()wL(my=q!7D~fD(Y*==innsv}Vx7aX+R^U$Vf9 zS}E9$)5hjFyRnU>&mzC?dw1vwV|&XO9IJB#Hkqm@aJQte0Dr#4WmPvuQ&Ry>BjR|d zum)A|8ULW=BLAeU8t1~z3VIqNpuj_!=cRcoD?ZxalKWiKa0gDIi?XrPgdX)VO?6#A z9CJH2Rt9tUF>iMkU^9S9uT^$QS5;Y!89KF&rfW75mp05*C>V!W`c7Pe9heBjS!BW{ z9*e!06KMo25`Vs<0th+~?QF8Sy}FujZ2Sm5cn9nvPc|nH7NQH0$b<+il<3Yvb`dg} za@Pcw4HWq4GrD9_lRRk>r9hh?ms{!@AfC3LpK`&Jj8BkP>$|_su5ag)X(-~rouO}M zuYNZ^Id+UFVqVZ4qtbtXUr?;X(!D=6Gyr?!z&J^z!G8$@KX4Z&6yaC!D1{z{oF zZ9Ni5CK!Uu_J=%6v*vqN6~IXz|4~*2hLb!zspxjKSS_}T+4bV@^VfIl%Xjmu?K4bV z-b3sK?(cL;?xqbH?6I{RDGBU(PnOo!yjhVoS(3wh*}w3`mJyz7(!~lay7&P|GYd2= ztLk`YUX#V;6My^Rg9(TdmLiKI+klX)fTpx9z zt1Kl9L_fw%9pU^!^Sa#Im~Q=B57l^|{FjDYtX|K5dj?YAR>^@iOH|uuocM-La4{|O z}oioRLJxv>VE~Ssx{m$GZbV@m(;bP1JFoE zQE;y%6|(!l?X!Xw5_;rWO+o&A1-)J`XN%R{a+RHF5mh;tSv3?QEebQ9f7Yi-h>!JqouL;Pm9OzM?aEcaxd>OFy?e^w& z3m3rlZhyYK+5YW0+B-H~jP@ZWd8L!x6XEOl4Z>tGB((`}< zg29pR)^F~X^X2;2XP(zy$`5pez22AA(@UcmqPP9@<@$E@dh>r6%iVR`59Z9?NWBOh z>I0OHgGE7odSF*h9u4vRJWV`_>flcYCrYO1YH+ly1- z27f!GHpR8cIcPD2>*K8Qn5B?t>`V}E0AX8>!7Q#ZnV!9ti7Z4UtaDL7LQZU;;l zw&^Yo>2|?dSS4871OYNXb;7|;UPFMq(@@tN2;7^yXonbIL}xKB(shbH%ilQd0s`T^uL zFqv^t-^EMR&`6Wz{k8-QEb$b?RR+NsLyqtW=8rkxCITGezXjsOE|>XE2y~g%>9Mw{ zAO2zv3P0$s@8+L(+3;ryvym7iwI*V`I>5ZB%wvmN%t{0Md}+kZCE7*>Ns zT7&vBz{DE9^eHQm@^PWnItX=@5>2~4ABs_92Ublwi!1ijdWW~M7rSrJuiCBjmju;_ z7z0RtEQ*BQ#BM}ZQM`QB2c+s!N=cq96_MpR1*w3=p*SxmzY46F#k@-^NXw>N4Y6Fs zAP5b~jW!8G0XfNl^}nu^&woi@uuyiSPX~})D;I+WEVeH4C;H*kBm*@`=E1Aq=#?SO zz*UV2%kVm-AIu%BY_Cr!On@=B>~*Y^O%sez(vXyiBV!Ai>pZauXxHymZJDLS$>JnU zEu#?|cAg+SWpYwax(`Q_Z@|cAr!X?vsV6OJ7<8$KCwr+#E0oyRPJdB_vwqrOzEfDn zF`FcIKqh!K7Vs4^3P||@s&RzEG1rT`POZHYJZz)G-_yroazzhA%Q)B^P=szGYc71n z;2U$FSoNEB?=P22n=FS;2=4BgTv_-mj-djBDH50uij|7UTg z*d_D@+%7Q-0#0JSKM4?m{P*9e`Eje00kjj7q0kiqFg2HPo&qY9uj@F+RjCDec!EB zy6RuCgmZ0lCRD0N3Gdvn-sse>nyz;Ym2N1i+ViGsc^&rQwk~}50@?JWUi`?0b9JQ@ zFm0@Zk7gpACsNaRDV`SiP6I9^hFbDK#lduh$Arxt6U~}78EZ=0xA|s zmPfsdc7GyU@D?CrUe?!W^+c|x1g!yito<7d_}t2l6-T+q6P1u(0UafQrv|N!WMA2K zExx@~cD3CXJwyKlAHQK&p#A&U#wSQJ^sn_sC9qyz^1gS&v2PhJc=1DDwSRXEm(Fkd z>h`oJ-$Q_X522^NcllPPV0)2UOKzSD35MCI6l|wh{x`)I!(tEaD>cMsaoE59&zD!c zZQQW$K+bek@lG6v_UxSI3*jtf6cucsT4ELCQ0$ywEh`Kwr zTUOQBEWDTJO;MH=UKKU(E6}HJiZk}O(z78DlwssRgmuXhVKOyWW!U$wC^x>?;pbVt zYVpyIb-fu}-&AcefYBYijmhKDZRARYAT)TMQ@^ga+9K?^t<;PgphM_@V0y z)Yb8x925~7G7yFa?%FN@x9qmO+4ar^bbJf^z5!Al;gr?;SQ=Ndhs94_Yok@N@>O%F z9c6#ucfVHnjATnX7=JoOJNkfM1Z>c?G8mz<-WEy=C!_ZRwG@)gfUai?eQga9`CmLm14C3Ec0Ca19>h-BRpKc%TbI z!1R#DH^#)Pce`-H3nb9K`p9ruHLmqsXo3c`w%e}9isQB%A9?7?@bk(8g)Q2DPkC=f z$IoO7ZpJz(8wV*JZw_qV4^O7-@Ok-#uXZ7Ry;ozQAAO(@qWUhb@Y*{kN8qg8CEuK_Z{(O*% z((fTWAAg6tNri!dV5TSpFwlYWNp2G=2Lix{v$aIeK*=Z@|y*z$%_`u>OvOmVY_}XybByD)(Qzrdv*-0dJ<#uB%Hj zy~-sdjcP1uDxl#Yx87Fv+;u4HNw_DUk zL9tZkllmNMp(>m@NU=0fLe~SK2q|HsjYtwX&d2uuoR4)3?HKg%f5&``v{armS908< zu77@YivzNSY-LX>6LRJEJoBE5;DTMD#)5Vlrw%1#lAg`GtC-(2X&}!|`Sje&X_KB? zINV?hNx@;DYIt7Rx8X93<#05eYhXKxJ#a5f08r`N2V~<~{S<=`I&C`)f3*_;Y21qNTN&TZ3i@Y=6M% z{kXlS9*6sLd=JC#!6U=0T?_riI~?%i3~05R+i{k`r}~)(d$?-G(H1*fI~&RAdK@os zQ#;yySaU$&5jX=*FfagxgKgJ9_NS-zd{X%5`^gX(Z3*&Vb;%XZI;3QpKcYn^MSFgE z_76D4ITcej!o@9=ypC%k;8EdB$A2yj0Cq~k1gY6CH%nD8G$UHk19%2Q9IAPEsM&My z2@X=Y?u&3RiI;J1)Ko757mn=3krMvdt|U5nQsfvKUhvvCA6(VryzC?B@hHUQ~FTp z84lE~KLC8Bnlh6Cv=ah3F_-Z(0uz@!z5^?h!Te5tsmK#aAcc;+yR|1o7^JX_bri@b z@$L%mogXchLP+huY4hFY=D)9Q7fY3B|NHyJGM4`Du0V1vekSPD&Z1eWO znpuWo_E!oV>u7kdl~{AYG#sa6 zMcJ)dvjcu}=A{)b9WGf%MApId_g!f@p_$;fLj}PFz`RywePCd->rJ!GEB?33+q^D+ z`;zpBM>n}8S2v{H0uSO1!G%PSs5A2cJCm`0KXBUQsOe7zb*Ny~tQjNc&)6yC!iEy}o_~vsIRS-)xJm3zgl`a0}Y(+~2_MpUxjzKZA zEM*j=VdBUI;6q@#ui%nOp|toG4Lu9S0wsOZ@i*tsdXI`Bt-wEicwCWAn)ai`b_n*= zSpT?tw8CqIMEQ*f(hsKJSWDiX*>2Nf2(;TAu3^V#6~7j6Gt#g(3`Go*d)q_1)DUS7>CRCx%R?LlK0+Th&+Zehs|> z_CWLie877k%-U0F5{Lv1rSfiAuYTmT83TglXgZ-NpQEnmKv@ zA(&1GwPOK!1OZN$-2?$f4^S2oGZ{^gAvXO9cbA$hlL5370XdU#DioJm1pzsKwN8A9 zlnzCK1&coJT1&0;BZ625Em z-S*>tF)bUz_wq!^nra$o}(;D?5ZUfBGQS529KC>%gz@|snr3`c4d+tu1rOwV4FREd!Mf!cC&i` zL7Ao9&MeHQn9167v`3EumRf^Exh7I|=s?Yy=5(vP%3FfnE|Mh7tAWX)VKP3T89MIwkzi-bNyXIcl??W%H>_hsYOuX5M$YMdiAw}J%D z1Q$m;B1~G5;aJ&w08YBA37R|Lon@n{fLSS|b47q3aC8Fys%cw)(-Ew(%;Ci53zpAt zV`nNA{(zq({1hkgoIeN_C9&{U{^zn@RYL(3CrpNa?fT-nX{u=Zw>bx4Tt_kdQtOsjZ*j_DZm2}s9~eev_pA1q@k zMEgXxfu*Y1fYVoGN>l0SB$C+}%P$(sOH&yP3tDp?_U&Ludgv(3+Zy=qIbN&ABSE=I~oF@GY2MFh&IP5&nbh^AQd*Y)jH?=8G2_{ zBIHSOS~)U}fza%gl%qMUAAYGqf5JkY){gm0zck75C5OV^F^jFAW+L!N<}23vs%lE=_|Q6eI*Y~A4Z-P zvy$J~^p*Gurtgt{8`HNM@uu%o;!XdhI{7N5?}_-@rXNF~7UFeGU%;(Ossl-1{(>RK z=A-MX!FmSQa5?MXa#Wd@#h3SQ-@YXQ>qO3fsQbwDm+$NK%my4wNaG~X$P6MLF+E-> z|Kjijc&7%5C6CSN@)tBF%Vg0EaMN>1x9hyfl)f>S^EM(sUJeXA>TOQ-L~mm^LCL) zUkSuMRluOJHB~_^TYHFYX%IiY!dGsFW^k)ovmp^e+m@x^dt+{z_5n+$SbE}Fc!}+% zovRL+`}}5C87vrz#1{6LuxeUj3B)OXV78uvRUg5>uz<>vQF^OpK>l!_>#+_AomFe{ z-nbErqWIl5e(qS5!_E)01nC&|8IQaUuJWsXwrDI87Ac^8A_=dWjE#ae^dVr`(YG_z zdU;T6H3djnft1g2oS&dA(-BwldDxUsEkJZ56XHt8r;Vc+IB?RH`7$5e#z1vzLqEtRe7e_}#X$^B3J2@p2W zC0J;yIapTiJsPW6Xi)@$# z)wt?1v;i0KGp|3 z`rwWg9a2vdrS%~}In_}6O|FHr|@K~v>%wg0$wz!!|+!;6U5p+s%6&67FifknipDp0{rsE6- z=nQJQuDmWQu=27mtsDRWsSO&YE-KS?(HxD3{*}5Tkf*5&g#@(1N8rH&igA#drgXd9 zdIiEPM=PXavUvYk)DfV6bX(=tHy#|q^{g8@8)a(d`Gp}WtmIRCf;l5b>^w3^fPq7G z_DzJ32h+*9vm08mE$2veveYRgpJ4UKmd>ohXeWY>Kop2zUXk}Xf%3K)thfML_88@< zjdpQc=Ce;45Up99_-GC1?rQfi7u5yi4~!o@V37FTjzCaNbnNJV0mDOG1cy{32Aq?q zGRYt?u;eS`@ptFy&FPr19%^gfZsFaQTo5gozO^w$H^oyl6o>Zub-`umbHX+E>Q~#G z*$4ik0EAW$Yw{sTAlIqVXwdw1fMCc_iq`@D*e85nO_VE zfRta9yg&*ak2&IhLCF7pCY5%{FJwEB)=$=#c`TziOP;!J2?;9zsZ1iy*p$-Gm<`X| z`JuVL^!qS^R}L3j9~;B*GttZy+hLtviS8XC7+9aBq&5A})^5A4i{W;q1=+-PPvjpX z$V+D3n&%0!lU^*C6WV4vO5mdY0&P$J&@r_)e6!?Q1S#AQpZxnCr?(8h3aI!m|65ZE zlW;>60WgseCk-|^3NK7$ZfA68ATltQz!d~4f4y4UjvKiVeb-m;F?pfLsv?U87z^yK z;Z2YYu(18GiQxx%tZWo}N5))&&EMx#kH&7PB}?6I8<+w6Lb8je>ePi4hZHnO2~|_D z6l$YD*;@&mO2|2I@msWpUY0|lhWi?7RkRvfE{ob)utK;u4Ds>CArt1!_#>Yf|CzZ2 ze@(cM;&U~;#b-59$eR2KMr-jW7}Z1;nlOzO!xIgZFhY|+K;fYH3XxVkX@MAmX@=~D zGe)58-r%B=zbS#V2re*96I*D;G|ljY*ca;aS>WuWCU@4cr(rGoBb@LVQ?~5;{{5e@d7cPlXwySb~W9MGI29R|OXZq6$W7!9vkb znqnR-wxrAp^48x-3MD6hQ*dF53x!$?%SX%X4+Iy`rK4F&!y2HSQWYzL27QSxJwPf8 z1-zxXoIs%_z~D&L?5Mk%$ovBO7MHw0>BS|l88*{(hB;^j0{E}Z<*P6RQ(kDrf5LJG zz{OSIZ<@mk$*bcdxa;u2XH>=416a*P^9R`lfuM@@R0qOsMXfFr%TNP`xC<@$Na(F) zmj-KY1QS!NxiR#vu;k_w1IE@(3|ns9#4zIKmaMSehQ|xQ*cvnnJ+$WEZzVL#=wZ$d zC_n+S<=g-&T3-b$-UueBm>M!=f0rzn6d$C)0O;b$lg09Bcm=q?w&!8_>p%VpmZaDm z=UO9y0N3~L-@jgb_uXeZQ|1W$?stB+y}k=io`mHyAW|NOJ%gne$oX&sP%B`1xN#K% z!QC6bsgG}5mD=54^5ri#+p8DryYMP3e|h>WEMKnwxC3{s74V(gO52HO;dPp4LD((%-Fk25^s2tfJrd9oR3z@~7>~ZGf)+bhWu#$G@#VT(56_ z+Pr_WzPY26^Pe5InPf`tuw%oLnIIP&CD#;FIW5l9)qgj)slM56f8Wg~2s~RX9$0q5 zNJ5=x!GPv>5Jj?v%bGd*3s`ISM7*%L(GjCU%!N_`@HMZbC0-tTJ*|Bi4G(oGf6)UPGKHAQbxE-z@hAuxD)qCvEC;&u6zwv!|p zDIPxU;dHzg>?^b)yUlzym+4u=WbF>wn8&fP56A>_?;G`{PNDLiVDWk6Fx~{q5x&`&2!dqs(DgHZ&}~ z$gq_2AqS~lp2zKXHbx~YzPSUfoy2v|HJ!$22G_(ofQ8E_SbBWhnS$@-JhZ%PWmf)4 zg@waBh+@Ljxk%SmWOucEEB-O66qEx+Q9PsN)MA65e=n9JQkichL~YT8f;e4$m#vl9 z%y-f_%K2of%qa^M_>_Vg616V3u0CSaa&|Ek&K0gRCe?c^nwvN>*oE*d$BD#Yo0A~E ztDxo^s|Do^Y!f!wZJ8oqm3G!Dco-UFMBTdId6%&{EwbhDI-EPvmSHL;GBpfH8Ii~i zJD=Ftf4pR`a^JN3QTTM=MgKx{ELi(Kz8r_YpFQ^?_f+`Hu+9;i3V%r>tg@sT;XmzW zX7DUE4KONgs%I&?5L{%}mR(zSt||z_0kbP##_Vc`%&s&ia|wB@pnS}^MRyWpB_fZ2RiMmFG?Fc@ zi7F$!^IqpOr+!B}3*`5M;!9k4^^;jWy9?B_i>V6N!9kyD9;NW!vUS8k-;vJa!rgd` ze-6w1Pa8v%(Q8BW>QuVuL;TlW{y0}HT1!lD42IeV;^vkk!d^`kWUAjb+_i)Z8?Mu& zOZvs&LQ$kp(nzJA-wzrP_)*DoZ!}IdhvBKYwmj7wTAK5G*N-km{ntD#7XOip8S)?N zVro6HN8TM`-hNnL9pXObKwsOOJRctke^38VVzQQp8&a)Ib(yV>h(WE6h(X6X`zg95 z{X+jbH*IWKay6R@sEs+gu~|#5zMY*m$Gy6C4)lim1$Ng{WOHxB8XsugE!d(Uix(uKBY-(JP$H80L8YX1)(&czyV#aOkZ#QQ< z~4w9tWp%L{JKvGeOs)xKVbT59*Tc|7I60=Qts z0y1uHXlnox&QltQ$Q?^hhu3dky`#?#I3R{VkC1>#vOaLl-MZvpMd>$3ilAs>@ip?e zUWX=@7Zna^wSOx;LA|FD!90>mvwEWPf+pW(Nd5g97Wqhkzt`>T8sS9W%Vh-7eLIPl zn>RWhhofZzoEb`pU;y0Ys?N$Tf`y$Ni6PqcG=kf8s6>n!Dv^-o@p~T2xYn(urNm@c zIa`NTh}GthlccF~k_%=5*Iv|cT_DFw_s?s`Yq2oxQZw%wuPVZe*r@p&JjEL2?M-8B0 zbZfD4%74Q=tGx|qphX0>BP1$uxgwr$=7Gs@5%Up z3c(83bQwp=1VX;_QkwizUo`nKrpJBLERzkmQxn97wWe7 zcV6$Y#ya+{J}rm`h0C|sG(xJNiBrkXaz7cD9a?Jg zw%Gakz$&F-$TY=WU-w@4wyfRh3_1NyYnTpkJm$x`d8Bz;FXb^(NeK&ufGQMaOJPUZ z5>}5j3jPCS3>EVp0J!KjXhju2$35tfogNqcW>_M`2r`z(t2b-ZzabU!hxB-IL4TtZ z_tnmxo2qj?weyPZ;4fkUZwWbd+05S3Lds3clFrO;qGKk_8GpAm>hiD%e#0Vlj*>m9 zkDPaPzb*6d)Z|&wLN>xxXt02f%LNbg4d{XRFMI4(ql((e3+i23N zEmLgtZkoEI37UwK6Hqw)GZ%JAJk(#9K||JhrY;s?L?rdt&W%;^ro6+adXcr)zSAsg z%6^uLpqr&MkX2|tJXPRfa9;CdJE#D8n>EF$bAeeg432u-O-{?A?y z@^*a5s}qf!OWQ7`5c#rXE45~cRnu%cG}10*0c85DTS|xjxqtleBc?n1vPSRH@qX_) zy;mRM_d_o zI3O?}Z(?c+JeS)L2nn~K7XcOlf18ZVV^a}D4y?zgx2Nrv>BeT%Rv0ZivaH;USjT3` z3T1V+Zu6Qq8(>=A&2q)#lDIqWewVe+a<*3JIO#|QTSg~Vs8o$j^Qq2`yX)_+ZjgNf z28D}g{Ftv9Mq?`_VdmFKlQjG+wON4Zi=_f6jU{3Juscv_4v55h%-hz4(-HNqt=pS5sT`JtLS9T7aT9o4qlZ}J+%mUz1Jc-t_m-r}^y7lF%Yh!aS$Zwv6mjv74m zz?42(B;$ajK&qFUfBbVK4LS^gPOL;E!^S>;=;r0mr@M^l^hmfav!SMN zEs)W}6+0?@e^=M@w(S6>+|Y$vf8m`BFv#jU>yjhxk5Gw8xD>MC_gCl$tM9I^dHx&# zDXoNzRTuUj{&pq)JX@JW!hy?1of{A?5Wl9)vkfz$6fy{G{e?fU*DHwZR81oL+>DS< zpQr?D&xtN!9H;I~^jXQ30rX4GB^P||ib93QDCnsde}cqM2k|}03zA;?z}B(Q0HWi> zNgkR`q78G~J^Fuj+WX8{=J|#xyQGPz3r^H_nU!~^FoRpCGHkt5a3+7awH;4v+qP}n zwvCDT#I|kQoFxC)P9}I_Pi*7M-tTv~kGiV+tgEYjcdvV`bq!5oc1W$U@F2Nox(jN` zyEMUY7>z-67Tl_Y$RxJvnC*G|Yde5o>WPFjRewpsiuvoLDS)yB-(~1Q4C1wCk(KHx z7~R5v1?aE8XSXWQ|CP$>^3pbLNg$pARI=6!G2lt8t7Y~y{}7D05@3mI8KX+$pd0LZEC_HxH~Fs#2N~~b z3+8}wJ!vtym3ZC?|n`RZ071!L6;6hDtls4vf=(tgi?hH`h`;oF+($KOJ4m zA^!hlAtBNh{)jN>&gQJq6~FfFG4wsGBZkGR-MVhWS~!z#;cc!=Hg&u?u3_&LN)U=H z0PYyXl8)^WzPXg~n%M#)#s+U`ec|8{Aru|CHxB31FYxkjexci>KIdj@Lq3JiyOrpo zB^Ea1pU6(Rz|d;jIn1^}$=T8rtKVZuN*84^D$f0bM@5)_b&eCq+Noy@=oK!J2nuc1 z%kU-ybp_BnD}kOGF0;Vyi;FYf$|@=m5LF3FVT={JzAa`cFC^;}PZXhk9bFWK?dPbPX+HdxcaK5x5R}VxmYXr)B;~ zV~^}!T+0WOP8$PVPI`j7Ox&QMo-o|jRO6BN=lqw2)YTr696TOtwT7#m8*jQMlHgYg ziFXYp+&;-a9$!HZ3elWlGB|ge$x3HmT2Kvv!~@ZG$IriW74`i?$$!V4jXp$`!0weA zkpc4M4I|nfe?XQq$n)r=K%~r#STGF1zWc-ukUl!bjtl?rS$Oh1Wqs^QX|TckJ&M8U zN{7YerK5^6Jqt`Gs{abL*%rMNy?Sd%)0~rGL<=k{S`W7E%yQr@YqUX1ov~MeRj%fP zwZ$z&XI`^CUFJg4rZSJn?P%7KRNg4p6#}#csI=>Dwa-;0(H80R`oIIo@N3kEdHr#e z(OU-=|KKE)X^8eGkD*fKMgfork%IRnJwYO&ox1gFVxAOV#JEzRgA0p$in3S&=@Qw7P64ET6H-LfIEa{J*qV^VOw z9BX@u47VmW(uJJ2?2LC-@tUx+k>=xe=xXC^Ut-DG41M$kOo&9iu)I+uWTsKQaVg{r zRqV=((IB8rAmbhC(vy`8rKb1IkI#}t3#Lg(0LBT8))=i*L?(7?j9f@kqY9}&XrJF0 z*H~3&@|QdTNO`^PZd9n5Jfiy@2qM^u8tIkTNDj4298ST_;D#J>A3PQ@`FdBru^dyD zzTuBz7z?SMTY#1&-6>~ti@br3C@dlU@9GJWtI$_Qn*6n(^EjT7N3QfCg6xRyVp$YIAoS?Jeh$%O) z_*?Jcvfa1u<@2uM+K`5AY(_(oJH@$mU z;Ny)?LG$LWA{5oc?QT7|9_?S!%EZV-KR(l*cQS4AKmNlb12uZDe8>JjTim!}Uu=?Y zv%gRayc0nf(1xu!13v)>Z9+sYOR3kKVD<%Mz(=(&%**W04UbD8m)dV4Mlpo!CoQ z{BV+F;f0@B*qvjK+1TyDOoz?0$mo{DL5nF)LTQ*~P+g$Jl^>F?Wzjlym4($YAkoT&qtyv#t>kkyHSXgrt&|UcOLiWoIBvLK~>4G(1VfGN;9kmJr+*_9Xpi=Qwv*tSM#a50r`3 zu7lSsP9 zw9H4zB92_-frjV!*ddXiq~o(y&$WAKupGb&I;QCI@@~N%ewQmPNKtYN&wf95>Q)B~ zL?(5Bjv|DTr>>5NvQvSeG}O> z&-%WQa~H@YG7V6d!Zp5vo{d)I_=B&>paQbRUU(IeQrmc;4d6-NaMjFLN@|}GyBWK; zPs|Zt+73;UYqF6dAq6nu;%(10(?R{F7yI1xit3%mDD*+r(?7S`Z!4YGMfN?I(|w2U zke~J8DrJd|1)*EuQ5_*ufn>0r``Xf-6s}DEP~bG!&c^L(@{d41y@3}}FrvzL7r+7( zQFJ$*1G(jFjWb(q-ad`07{&0@#jSEP1Pyc*QM5>|GYR2Q!YMaecU^@q?b#m(=pyMp zv^^4i?SPhtU(DVw=5`#@2$oHNmf>&9#?PiP-LrM- zR{;&XEmZ+QaU^Y-{G_{0vNFDw|NiCGuhlY=|=Nfl{#W#B}G`c zI*RLmE=~HDpvkZZ@23}BgrjgToNc2LXjDW+M%CBIE)hT*hg0RrQRBb^65v5YL*-4x zxuKBZbqPy4f9s3F!DHYMliNaLwD5%gd>+vpm1!=#5DxTG^MCATXhK^Tg8)|ux?EK5 zzx21Ah=twiTho;%YbURK^c?I(e{cRO6}~S)=k(Jg7TG|so{hrvsi;C=5y8wE_Cga$ z8i08xa0rBGo?7PKmU5vc^Q3}u(SQNhzt(SV`e`!A+~`TptS=p7myl~$Lg zoVh0z=P>pMNK7d`{>Fnx@IhRnHpNQ0MZ9OjDW!C0fAFvY=$Kc?(6pX&ZLS zR6(KLPL~ksnVm2vp42+GSnXguLyAXi}0e6=8TQ6BvXg?Fx_&K#jG)eF;l} ziPbz97X~H`96-Y6KkPs3JDtlQP28IuGUw|4+me(fj5Z-MTSU|@K=>`ZsP_4YUh;Xe zvu^F*8EqWOHk0aB91f!ZMWkd<0vfKy;8eVrT^SG^Rc)Pc|9x^lXU1K4wF@A@`ZXq; zoj2M-`x|y^fhBK+#tblKACp&PCYU?zZD}>RLz%^fOfussJ47*zm1NlFK!8T0Chpo+ z@*lxPs1j}d9t^$YEcm|?xVm3IO~bZaXB>mum0pn*)nM3RoR>7!g?t@;Y^A%l>?tr~ zdW-`JBxT$~MfyUpRC5pyf4p4WQWfK}lr9%0b~8p?6y5e?KtTZyp3ynxdX!v9iN{y0 zC~>^A^&n?#uh=-UX3!x_{f6Pb9gX)Y2+)BRFcIOP7fZOaBXdjuFG9P%|N2hZ?V+cR z@@h!UngmVgQE~*pBnISdX=(EPa6|X(agB)bku$4|-=NeQUz65m;Ubx z@0V&mzY>kuRb>YCi$H`E6P~sXi5FiNt;eI0@<98qO%K2v2p9r8dFBHcVN{4u+x+y* zV_J}9CsXwXqKa_`om|J|RW%4U6$6lI;k60GWXibk=3t!(urj&p1?!>|1|Lz9svI`t zgq`gFFf7y0LwJ^tFOU_~yfN;CCukKKpzz|xg?9!FpKHL{CmW=i!Z3I0!V>b7RK*se zg6n=nF#%|NNoLv8dUg?$yNYppsX@-h7N)QbolUlNJ-sTOOuQr^(Mzcz%~`U1_*kCbJFOGgp1|+{R&--v1%!v9kPci_-t^|IwC70CJ-D%{9y~Ad(__sxtUk2U^aPTC2DP6DAib z^WVjSBpgoO+4uKdCoPNWv{#(|MwL)?I(Zh1rtE-GsAshH`tlywl|f5qc8oG7UC>37 zm1yqne|ZR_#Y}a_Sr0&X&pZn2`%1CdJW1q8KWtk&PRXL6j?q58Z&Y=UhF4!?59ORru@$M(H5q&=|$9saP0H(a9c!^Vxpw zNK*Zu922%_*d?bQ7gaFoh+BiDWN7fX`*}fCF`FpFaP`hTJDr%-Z}QL?iWp$5b-8Hs zvPpCP*MWK3N|L^dMCM@y{xr138OB8ONpjjJ(?s&OXfydRc@8o%i#NP z*yzJqAi{PU+3Z`e8YasN(Ak2q?~o>s{zA+9kQkc6NV@LTVIFGf|C<<{?HEUq`w0FBRK&eJkxDpRHh=O4^s+9T}?WtH5YnT=@o_o!BQ6wzibRhF@7jZJ}JCD z74r>0SpaMHaK5gNzb`$ z3mfv`OT}2j9MJhy2vb2GB8)%8%k(cUMb~L(u!beX#?#xq1*Y7@${Bps)s)W2YDvrh zifX&|d8L}+92R}U9zDjvK+|D?rkV;?%7+)!)ko94%}#80V9D;{&(#b|c~yc#f2_ia zfWYl-fjBJS0SB^gszO)UN3`#H9L{G7>E!EdI^C(7olk6~u(}HvC+VnQU zNg-zggF?;DB)liAb*Vyt7#9NaB_i{>T(qu;KU4znqdjL8Y>FHnY6mmeFhP!D#(I8- zA)Fw({~HmAf6mlg)?YEX;b^SK!3z{cp7dS3zaK*)I+jOHWwz@!a9`MZUjq0Z37za)cvQDI%z6rKmu)(*pNL!wxN|k$qUw#6H^BIRr#(Q zKYmA^uFTtdQj|Euqcgw#ukqheT66a7%WO;y<<55R`^~3QFH%sb-owBQ52m1B$r!!9 zcrKa6U(x5i{SK39DyemjO-rXK=^4uOvG#oLPur>ndz%=T;37!ieCLXNnFqf z%cvp;YgLiixhpMAo*^uyX<)PrOBcqlDtyr*{x+N`&p2#FMy_@&l;MA4T;SY(vaGcm z1}MQSeO8wro4sUZb6*w^P#~S*sNfwa1q&b>M$bp(3Ml=%$u7!ln}ZC6w{}Z`2fBU$ z#og(9dHO%ho#yHTs2@HWJ0%wl7HMYT&|pmWRLs_pBtjIn4{&IrBqtORI*%=FBCd_V+07Mth^n!Wi!^`GyG`itf#M0L0 z)irpy7%ZFrFcE!QkzW5Ok#NpH;qQ~MlP{Mwgy7>_O09yTMUGABb#ml`3z3{te-Ebw zidM%}RZ*uPghxxYp5_Ohuqp#@xT7AYXeUc1Nwn-O7xMVdx{6rTpuHKNUHKP);C>F1 z6Zk4&CeZrPX0WY%K^_uCcQ?tC3%cU%xd%+I3xRPT#1sTtK=!kgdhc((N&xDGv$a<$ z7xJG;uAKeRuKAiJmD_1f4N8clq3`I7d)Nebb(vOysCqOOg-B>Z6R9(LRh0Hj4m2;M z#;C8tI7Vg?33yfZkEyI&D$rp-(GnkneWqy4eQw0Lk$ijf5# z<}l*?^lAEsIlXqsH&?t>mp3Ssn(j;JHHv%Ko;z;`hYErondBPA31V+q+B9;dtS4P@ zq;LFktS7ISyNXQEAuv=Bzhk20bt2JUbOnuTVL?2mJ~&|>Hu8{g>z#fiDo3P%GZcXt z&o2we;4O(mN)Tr7xBCBzak8t1P1-*n%>FIH>fkO`-$?1dp7i}!KF2;jt|w$*8V9W) z_Lb^19pDgZ<5&UE92r{l-ZT1Ke<0{zU#?vMF{yMSex0dMO__YW-B) zNub+X5ebf1F*+p6=t{T(i-wb0%{0UrWvwu+j3>O{IZ#4e`0?4#2F18youN>_7zF+L3 z@o3vf;a{rY=f!bE+oEXJxPa|2sB9zhPw%MT>wCY}G!Vy{MP@IUcPRh&AhpZ&W+AuU7Xz6Tl-FF(Y;c-j4I;L zbFiACuDsVV_+5G_3yPUs34F`6NJX_V=a@NaX^!IhGZ~DhCz-(_0Ez}Mi&kExKhv4a zl^n%~v;pIA{Fj5bC{-$>YwD$!^)xhwgIkRgL&}>e)zYHiqQRIhk>TPHrFsT>U~G55 z#5;l(8OZC~!bs-T7rCLZ0`=~3gPr&%yY-T_M46?~Bs@t9nnNNl$ywJ5B+{a|Ls^b_ znA-pDg)Q5<|7lya7j7BQTM2J$japXIudIe{JE1gY8HxTULL1J`e<{CUSS=QPMaOhV z8ck&Ppr9cDu&n{-<(JvrQn7WJKp>65&Rv}_j!o*JtIU;fO=It{qpIUK+hJi?d%k+a z`iJoZO_Sr1xBUfUeQEeQnBmo=M?}(G#4RCYoGt?)%ktr z)fu?Y<{ph?K}F>Fb3dkdbm(#}CIUz`^m9#!0A39fYe!WaiajwCRhzsZ=f@)Pz~=7{ z-H~JjJF_qo0XV?~B9QWM27Vn6-KVUDAaxZ+x+_gZ0Kd72nhA)b2?IONAjJOzCp-t~ zM}rW+LT2GE|HDxErNx5~37;kbe?Sm;AT^QyyiR*PD(ZY9*Xz$s=WK+@nB(3j+i=g6 z8b#V4Zzi%MKp?4Eo6sGw0k(-cIrFbQaSzbgw*jWf4RFN41j7sBBvIaf3O;RM`0us{ zd&HL+nqMd=Awrlr@N6C-=mlMNYy%20jKniTLh$#BTiayI;>se8sSiC&`|gE~f>kt^ zwR1kkDwi+(+L6%iesfc}2u?hKxW5h$54siv>ukVKEMRq=zP}63Y*l+)8mM6!ZNX!NzB4#v zig~V&di>t4)dloU@_L z!tVFltG}yMs)4!q0`=(J+v>a)m`*MKYc(17dSP8a@T_e$CSV?bxHvVVz@}IMVZEbH zBNB~@;ktG)+s_q~!&)cE9RF<5@{JmvNOL2u$dkX%t4d&1C*xK51?TMfU)%q27O*jA z4E&%0{ud08!6yPr1|X$26$OI@8C4DNe>S|4B$Qdu%whjag&#CxdhMML)C-I$Ifs^F zif^U z8OwA}T6Z!c zqwb`rn_!v^txSS}*RZ=~KQtfg4APZEE8dNo&YBgSW#2bG$?F{gi}-*Urm%~1{Duvc z!Pp7gR0$^E1iq8?VgN*X~2|bpKz3?wWWG+@HRjdq+kpS~Of%4lVYs^SaE~=01*^b)Q_%&5B4xw5LG!dliDYk3ffg;ly(0>n7+A#Yv_+ zh_~=uoLk`j4WMxh#v&w1j-fG%`|VlEAB;-h&YEd^HbUgX#sJ{RAZ=aFP5F?)D54=A z_;M*w2-G`Bh)`v)-2^G&F@1IALpI5&Nw$1oh5|u<+Snx074pW4?WcuDG916IYSqBu z&)l*|4w$l?kx9tI77pjMyi@XaN;UEXa1JR#&iL!L0;xSeSeVMcQqc(Fn6zGtvkt?) zS7fr|DSACJU;wj{5HXsO=J*SmuY+JG2ArKY$hRos5G*Xpu;qH_@}+B+(3%YAr{7}p zF~UAu5xVyzDULYo_sjF-)*l0rZM6Yz{TH<1X5HGIpd?>|(*y)brVwq~8Gg!ziK{3Y zd}g7!l`G0cKWSM7)11&yIl*HSixkH0MtWEH;{}Ib5EQt~02U9;hW|_z-xPFsk%FAn0 zxuq$kAAh=jx*^U1Pbo&QgL5wO!OpOWQrt0!u*)$Gn^wgU>rBkXmc`vxCVmfBt*-}F zws)Gv6+r(Rx@UY^gYW&XAac9j2HFHrJ+rcN)@ep~RO<*;)7?-nXy6(F_rEoAGT((U zlkk{3-u`BLDq8212idR~jlZ?kr0AJR3dIA3h77T1DG+Y?T`6oe4A|@|w63du*t}m9 zHIp1QI^C6Q;M^+lUalvb@2nu)SnqZ+RSV^HH%iXSuCc>3*8(gwCSRx{ zsFj0%+qKynjxwc7jLXjPF!K|UFvnScL1?XA6Y=x@nlx6otlRF{1JOU?nfv&|dqCzt zEM1<*E@eD^py~ab4b}7xcl#UHui`RuO)}J_D1)So6wAq`T1^v$9u}doQwq|pjqftF1Q^Pu6v_jzVdi=ch@6zNX$k=w zbxd2A&z4)SVW*h*Mc8BOmk5=O?IX$3t zC2Hp|_9eTtrL|v@!Q*X%Hiuxgh)p`2ae!Rr(zG>IjyMB``P^ZW zee954HBTn0>w(4OL~}Mh-~CHl0r+QUmX2H%bvbP^({4vj@)9UD?!qL`#TM2 zOKs_Fq!S4=)O2>PHk=v&iG~m?mac368V(RDl+{D@F=JBKq@x=Fn6Tm)fF#NiI?&Z1&d-)d-3#fRV^S(?% z=tpK|OiV|55dAMxl-bW;=jlgf}Ndcd>XB!?lBX3}d_ykvl#^Z=S zh`J)EnC{&sjqxc%&vg86&5~4myAumpyZNrm%aWlXm^`dv)%V4mhf4IG;iOncDXr2$fM13YARrGpiJ1!cq4fm0@x%BWi`*eluB)2wzTWnafDkeSqk_@f=m z1VeAW{v&p5MML77h;*n8{6#Wb>9qO&^e{zLWQpkr0qiKB@fQl2KQL4l_gU$_dsVs) z@y~|g*SoSS*z+%0_fdj{UUh$QHjN%KL$z|`KVyLzmcEc*1~8AeFtD(Oa=C6<#uCu> zwZO_Q0qdK7*{|}V)ykWuO|mFK88AOr7*%0+_GPm9NfWC_f%+>nX;jPS<1@MXi1v1xCR zz_>>Q{@D%|s<>nRP@tlDBKSwCOXrMxO2v-VaIaZ5THHV8Y5I6>I|v~>x(nT7{2o1a zIu6}F^9|C_mW2C%jb0r8ue6wh<$tFyz$LDoE3osn2N32{aEb>v5Ch_U=}zgGGVRWv zVOXcw;M!$r9o6ndmV_w7@$~t2Wh$atN<5~>cG-Hsgi07Q($@r+IOk~T_jDZk{(V0( z5wkit8)|HWKefJ(o1IJ1l%J)|R&{CLl>Ldm;@tvlo%yGqO+Kk8ahP+-I~G0y=!=Z* zSt`H+_}ua;KA}*CQ;ZIJATH(f+&}La1blvdTms>O37wo64w}L19YK5I?s)BLHk@=) zTT6Cut;g#*g_Eu=v>avJVTHbj3v%;j+agOwc%As=!9#iRr)>G8NG)AW(B)9!5N29A zTR$B%u@oTRtCk7pi&fWZXKj81Ru5Nu;)uv^jqmf6ePYA|Z=!)SpIT$e>!;n-HZ#q$ zE$!4!)}0d_5(vGcNfToa{`4Chfih@$TEmZ&bBX%ajy27+ng~PgR1KI^M2ZmfCu~0k z9TVNktUa0*%l6Iuj|aAbI%7mmW8uMzl90{W)KqyG^({*e?>lgv7}OR(YMCUi6}{{b zENCYLTGaz>eGBmMb!1Ae5oW@BIe(J8TPi<=TFN6WdiyrBq1r(+6@fi0V*1)~U@;ra zg3+w5SYw<1e7}7pNfBgL)4GBcc)T4css1n2Wp_w#p#g%GbhBCf5o9Oa*%=dVNkspb z^02TJSHcBEVxYo0;qeuq<39@QEq{=V$ShV{q#W-04(_O|_>mM0D=%S}Cdg}f^@`5> z=Y{|;BGX}JGM^6pKo4~1=9@$xB36^AWpFBesQIqHftSc|t@cduJIMb<2G~D>Xlb;; zTohBRbp!-s1j12_snbXs<}reg3-gu#df~KIgGnjxh2ae;-%(2#4ot!$R4)`1`R7G8J4oSO`Y zZ6>(5{>M~`3qzA24c^@~AjZ=mBXr42CsaqsR(Z0oBaomqcP%I;fG3TcU!3E=>K&hA zM5@?#!1`6Za5H}Z(4VrHkOfG_{&`WN(e;;IxC7`fyv5M^?Dv>35KV-JnaTQ7K%F%o}Q-VbeL(`@WOu0l}5l!~6cL-Tu3LbUt-~H|V+=d%-d^ae1zebo-ThyNn zR7N_Bn}!CzVkraGIT+fyebz*OaPHae8fwTvh?p@*sYAGbV8sT&N6FP9F+W%y8^Zzh zkNql0HEWY900H{el>$8^y@6WP?~0#C&9W|!<|B~jCUF_0wnCo1?^w#}xG|r`{L!bm z6Wjv)Sfi(q2b2h*?gBDS4K>=^jr2zX(enea6&4@6-hrE= z0!%q);l+r&;F0|5XS4$lr?1St0k5A#B2u`1A39QiF}H^VrdqPpSE{tuf`HFc{C_$! zOonpq@-hG7_^NYFs8qdxemBs zCki4bZRfxFb6i^Mbji_jRyaPV){w^&rT3gOq!y zPO#DdMA(dQWinMslVgUCr zA(SK+zV$FYMx#%3NAhJ5)xoJ`{Fh`bJCZVriJtRKx_Z!x-Wi_~=$h~X z)xi=1^crpXz2Xx4+SG#UOin3|uQ-~fD3!njfM@fGnrQdM4c74?Ta+Q^s7Nx<@mqUc z54pe^%`Vno48eQ~y-i$wQ0f-;J9d6wpi}YVAD}EYZk^TY`3(t2g5_Y~!4Kyhbs}YZ zJ_YH(9M`rmrgp;bSU#}@MP4p4Db7KHQ&edUq>IscEzQzFM`Fq-lk zFj_W$e_$B9&QU!t+Pc=Z3?j0n7Sq|LjU@tcy99oM$x|>!DZyDG2dm9mALGp4uXo%* zh|;^c5Z>2Rup2>m6>i_J^d!^Y!`Gh=9a?{AI zI!=>L>E9>d^9%R724k4((O4KId7T6skSH}`ZId${W;3?FOnETN|7S^NB*;eqGY2aU z>KC1CBK#Kk zJ}qFxFWe*ixUDxihe}o3q2X0L*(D}@=)h1cblGs|50$L*0sZjt=9L8zy2 z+#dQjsDOZ0KS+F0NrrqSL>xCETOJ{2cDa&Ax?G>bN*fEeuzzc%%(u~#xQuEx3^HQ! z-}X#b68Rg)FB4_@AyKVZ4f$K~8^E@B>%MSz%vA6bz8k-ZNiw{*A^^aAB^STH*^L+76Ewj+1Wk%^`#LO&OKA&MWCsedSa$|?k% zN)<@YI1w2N-L!R{AFxWC1qgNs+qqQn5RP)$QCVZvqRN+RO5`#56YU@nt62vh3MbRr z_peTo>Ej4Se}Rf$$#!{`-`{9dDC~--snM;AQcvU7@~Lc=gBE5%$TA-5iiM_IUZN z$zS!~rb_b6cU>aP>lqy=IuaO&R$qaRN8L_)>f{jNmysXXTU`F4B#OfD{!EL2up=rP z2R&ijjEUltzimL$FsYZ)-n!&|YW9c%w>wj9uRh$SIkR@VW@^0W;D~9zovJ~Nh*`nu zoq0H~cpTd!1J$(eMF=IUXI8F!+B45e`?7;x$EoZYIu+Ct$0?mQM%PSSbRSf);4=7i)xiYe* z6vC6JW9r1K$uWShk6R7U@@hP)uWHII$H}szmK()O8|UTEQzMPEz9mN{d#dRQ2E6mfxZbOooIE6e zX*<1+F~VM6@Qy`mmD<0a%my7zeJYzz_38HYA%P6)r}FB}o0Kc7H0 zFpS5=2}o0lE~dY~O24lJs7c{zLN_q|mptI(PrN_a*810g$fBOJlg`KO>#9e<+o4}__Sv>mZ|5Dq%w!w8dQ5NMkKcygl-t%7=ybr7dI zY^BG*7kY7eP$vySnnssI&Dm0EeqEGI)Y+Qpj=r93GeuPvGQTRJ(2?-@J`>9>jqO;V`!?Q30c8x*wOV(-Lq&(v88_t9Y?L|O$QGedlgqy3mL zSo7lh-ru(0r02svWAY*d5KX5`AQe%`)5+z_j{St?qw;RjvSeM;EjK#)k3RN7anoHr z(p^X0ICZh$sfe*6$+z1O439Hxqp+fo&&Aev#h`hjW#IrV>+0M>qJhp36MMYV>d3$q zkB%x#jRZow1O)*r$?bGnJ$7l}Fh*SQdCeQDnF_-Cub7Cx zZyzhtl+#{ZnnJ{vQs~VidM$bm>(@o{t!-%R9Y^rYefpHhNICv81*t3jBC;@cb+df) z@)o(bhm`>I$MfQ`p9OriIVBYq0*lAJ?dQ*pE4^FxzOE@K)Mu*VYic3^BK-BZJF4ZdgHLD@mP!x>-Q== z0x)qx5iZ!`SW8%9kS*kDdSBM=tS_pDX)Bzo-vnqfyo6p7jC$C-`1AC?%!LI~BtTRm zLZbQ^St&a#k?Yzs1-zz7$*p~m?8_hNxLK)m+*H6Hu*PsM%!~*TB|X~6Qrks^zU9`q zvSVy0m}%Zb!M2k&g}dt;Tf`#^H$nJzMR$e$ zz4h7=+ul^&5fy?vL?r_X+hX+imlvG&8YkcTYI~AAuIKe5ux4`A15!J&^yhyM1U~cjVGr2di<37V=$^w-qS^#<>ZTrP0E?&vZS%9Ll^eBLwcds^PSAbI@oxG zw5;jjoZ{Y*VhCqE0uda2EI?K{^3;B3aV)1uiZG9XNv?@}1Q0kl&Z zSnR#u5!`qy2tXK@(}^rpf%>1W z%_`D)NlnP-m*e=s4~xRzK}5zx8G#99N$qIy0(GGWg+#rsB226zALBeiE?c_sS%&b( z`pL*I#Vlyt`AWlZ%%LW@j??%Yt`%O##(N7xqCSga*s=vgRo)NbiDFBFIlC;f!q-!2 z4*FDcwOmJoZCxPC@__llxa@=4)~fDpf!=!SgIyJLOK7{8xAu$Nx$*>{AZOcT6KsjK(R+l!Ype4M_tM|tl^J}_D)UtJ<~10{Eyw7k-JE#;v(lV< zFKX!w+rwK9Vt^!6Izh_1t@2_;GILdnt2q4BGIqmGgtYVfb!3o2pg7oZ&Y!1S5R_B> znWE9JBh>FHPzKTeS~G>(o;|QTd8IXp)%WWe5%jk{ul?K;c`pL7Su^De_3@wS^3iz? zA~JS`mS;p(oXIvFvFy-MJbWGtEpSoD=X*)LP%QJ8l9-Z^r$r@X;nR z)6JZzO-yRcHqldI(`9B+MjueAts2o=W&==S|4We%Mr3azA_W;h&!ulJAOFileIlme z>}Bh!!{bxF##6Y9E<>_C7&<55uf8`;OjhN_qF@4SG=Kqe$$L=!yEdwJ7chdR-2~>v z#Lo5flWYU*nL#!NrEG_*&h1B*jVcVzyE;ZKk^MJi24Hi&a4{!%R8M#92B~bd6lz2;>EQBL zhI^Znuj^KU2k3597|A%g>?14i%C#49POOIyA`noa1>^NcQCP?QC81$Z$eVtGxexK{ za2HE02t-BnqnHRyvg?!hA@L8ob2$`r6uWTlh%)>j5J{Zk<076>M#c$=owC(Tz^fCw zIVAqf*uDx8douzB3=$`Q3kWU!C9U~d_51o+*0@D|U@7HK$~O9Ck;^pfG}u#<(goO* zO+LU;8!j{}GJ0%?rA=x#fU+m;t8-0^eEFi2Cd0Mb3Xkb-z_KjFz1qL#?62L;$hD~~ z+#rQ&vgFOq*ZsexRM=%=itbuHEtd#nP-Fg)$BPyrvP2;WR`Fj$p~N>uh`emHRu*=j z;IJYeI$OZvX4`k2fB(J#Hkk|`(tA+-tR@Ug^S6%M zAnIVruPqxaw5&X#pu!3Tl2m=n!g~paBy-m+<(kv8AmB}4T8}Gt5On>Wrw~dB05L7x zC92xuLtN*#27%XHx`_gtem@(Y@{90y25Vlr%NPx!MdbN*xEePqN;%ZiK_)MIg-PKt zjCoknA;A{Ef*LAnc}Tmx4N1>d0fq@Pmpq$UR45%Z{e7|YhWCq63N!q~Ea^E7av1Zq z=J26l_}(~TaY#TeQ>6M56eaEpAP(UM8Wz5Y#BKdG3yAgP*3|M}arPYHZ_lp}xl*={ zx2dAdcDSjwxlg++U;rN8m!Ci=D2E&7!0$13s_*7nf-YUjrhHKYQ2{ca>* z63J<6+vlqfzH`DU4>SfuKs|ztgMm{=Tbc(vVp5C$D@O6{Ji3s);a^D$KScf6kXxqxrDEg->|eUsyiCElYgeZ4E>gdkXKNEY!nv)pdaEDZ4&__S7zKR6o!y=V^h*R^CICyYr zc9~3^Cocgbqx}EadZ!@Gf~L{B?Vh%6+qN}r+nlz$pSEq=wr$&-wr!tz-|xg9C*uF^ z_SKHMsm!X(thLtM*yZzcSf>n;^0q6nOZ_TdGs8;zW-(*ziNC41&J+1m+Q9aD++Q#} z0qrpD{d(Si0q0&Y)eCc+x(X>w>kUrg_uQZ-Xjt!w{E#93aD*+Szz7#y|04WCau}+VJ9O5(gKbKMCce(L2m<`e z_W8`!_TCZcX0(P|LR~6L8HVfHAviKPJbzNLu_g%DS4(I82ERTWaRP~V+_-ycWBYqb zV;x4|XwkUQC?IU?|MhjI!kYtk1CY1L z(ETI|9fmlO{j^p3!j6ftx5aZI2!1-=4_@|C=Q|%(WjWMkd#zZd2C*5Z*>sRA z+?!9Q&+p$Ixn&ymXNR9=yEKY)+oGfjf@<`z1*Yuo&%D{Az|{lK7MeZdl4j4S70f3+ zxpPC$yqQe`Sszm)4Wr~dMY5Vb0Y90UvFXF1l<21o!3NsMLZY{d>nl4%NK*Y{?#F`lbCf8Z4f8UBs%_-9$k@mXmE%@o{H&ENO0c4usEmqm4 z%Pk0bQ#^#Uz%U=RQOrSBPTo8D8BkH*d}&I5m8kLK-LPXi!eIo7&NlFK-EgDwta|@^ zhGl`e526VofL%`uMTn2JlHLH#lbjKOf~HOL=faUh$2x%EH)ro;0)}E#4k!wUIZE z4DFiqw%jx0*Z!ARG@7Rw@0SIpx1*J#uR`_~W{9T~ymOMDWkz}girV_pBRgAyz03^v zDx=}n<&QVi^DrwT=c-5wYoAS>*Tz8UV_6PnOAGBN;TXzp-syE@0n2&krZERF3vSd# zI=hW#5HWaFa1WQomGI8D#?0LgmNv+0hAz;w?Lnbvtm31^v?3&bJBSx$z4J$$PiX6g z!dxdd3PO-Ilv%1L){2IzBlRIWTELDJp3Z0Kon_jpuk@WuROLx)g+_D?%gwLvzo9uYy#j5=k$WQS+C z_(Vj-ibJQRs&SP+4FO5k;8Y4Kamcrftd9kRcquFAOfU@y56J;G=?polLg=IBc+SAV zh&QT2(r~_g`*W26Hk_Uj0nUubq5znCXApu~LbB0!%D;I509s#NA#`2j>wiDsloCRp zttC|Lg=ugn2G`_m3<&Z?HK`X7QZmd@61pFRL4H?^+do4qGN>tw&_gRigt27)k{$~w zER~dmbowhZeDH6jr2ke54)WV9M@IdXi(dn?zz97($UMEj`N&dos_BU{a&B^i zYEn)iLdDD7ZV#sm5BaU-f3Jd@snhoByjnef&Ay2W6&UOI{`GL*0Lb%m$(`Fj79&DO zsv!ClHR2W3Xmnp7Pm3X#XjMk|VDOAwCUFaVQC4jfoXMV(va zDbNl(Zb*aeO=*CrZKWyMse%|1FwX=meYFu%I+(rw=V2H^;M;^LoNeZMjwUmT^Vd9y z4OTory2(90;X0vX;oVy!d`*mE9lKQ(Rw^pRuTA#Bl%j~pNS7(re<*M1EKp+DKQDRRAP3^0-Oh0MsoPjBN(syefd$*F}bdJ3_eyfq_wz%rm{OowZWs zDYi%32Jnf&wBuv4s}c(Fsv5UPkZLDy@2%B+v>lGgg_~XtJ@dg=FegG`^vCG4yY!r` zw%(yvId(5jPh=?LuFLpH4Yr~i7`%1xVLq)d-%AtX_MIu69H)J6~zwCf%-^hE*okt58ii+Z!fcYV&iFTtO?LKz<(s3 zpRTB`xjj96A1CAeH1Wd(hnZxIF2LrvVbnGY2>|>3*yz0ooXS$TTW!^wK%}VH;$rr$ zj|xg=6AEMzQuHMgSzPJ6H%0(b6Lel7(q=4J@lCT;V9>DDzkDOcG*} zC8mEKrg4!b+pgU{Lvi>eBv^1?<=!~{BF4iD1Yiks#0*sD(tNo|tFS`8hX-2&5koM5 zP0^&uHBme{#AKQ>M*srm*%dxL<$W==ip62VW(F)HKLU%HnIeh7qsA-4SavYmY&tg4 zJX%H&@LSO27OwI{QeoT9T!{v_XEQutty9K|4g^Zwz%VQ&ZP`>ma>$h+3T zh#;erbr^SL7muZ@blXhAsvO&K$wUQCzVxW@3)0SirPyw)yaDL6X`5pPA7=C}tTKBniIbj&GaM8Z# z8k$7!ib?MrL@4-U2oMQU@eu`p?~LC<-9ae94DFl#B&QhTWp$NhLuH5Vybb&4HOVXh zsZ?3I?wUL~!UOJMbpuLyGNq!D_WR6GTw?FA#E8T=agOPHc!ccdp!bJ3?haE(We8Q= z;(PeN-nK(q|D2!>6GF0V2it`;A;diU7Dam(4RcT(=&c-?s+qa;7!_s3F%4B(RN(5} zM^!B_P9IUye0^%f)IlF8-uOZ8TcYvYHA&AMTW91;Z~#WN>|vZqcKD~K>|=<{LLnn& z)=jV+I48XWghoQ3&v=t-xWX|xbWd5nGibIZKC(g%0tHWw*~LwI~8L z4%RK!Un>ce?n*%5&sTQ4+zyPC{zsO+iRkc_g9u9KFeo&h5%rLGx7upH15&`faf_0_ zLWS(j&p0@D7zuam6OlOhJqK+P^;4+U`hBFO1po)id{7Y*b!NvsXdval=~DwSrmdFP z&3J#@kIMnanH7}O-sc=a)LyR}Um%wUY3Rg$T!JY)kLaUJ^POcCM)j;fdK^$j(R+K1 z=B)F(oW66>2T;*by(kpLXuU7kwN0J)rE{n}hV*iT@rwoOHmb>TGAB&x#cVs7G~b5;{&2hRn^PZqaS}|Xc*Q$xm%=G$KLL-b1K*^x4uVMF8^N& z`=TAbM2!lnJtgzGe`2={4DO-#e?Qmt$9yGV44U?ayV;h|FYjEv<32l=*X*qd-z`HQ zJ$#an(%zUYeA((9Qi;=i>Xh1^4<1HE{sQEDPt%gu$0?(5TBS8eHc0?(7kmDd@MLETTqOGXYcA4Oj#o$LxQ;liB--O zVZh$EB`CWaq_Bn?wBr&Uk}HFupr{Kgxp!2(T=nocgBE;*0MFCwW8@#udYR=&;|H|E z3&;wkA8?!e49)>8Dv*#tQ8ieXQ4`zGRI?2$OkO5!h9nIB3ij&~4|o?%g{X`po0bH( zAQy(wB)K}^Hee<(lni={R0;}(NVgIu#bt#81T+PoIRRddFf#EVQ`u~l+`BIAVEu*R z%S%6t%ybo#5~zD1d9B|@V&vt8$^cPfZ?t)!i5Y-<;A|=d_2)Ajj+K15knU7kHlU{zslbfBau4@;)ld~iBs)tC5^Uv>P8YOKkq`(b|NkFa6odBQW zo!ias*T)gU4dih4OmEL${fft`s0fz$u^;)9kq(gxhc{pFTA^nmTI;Ua& zXasiv#RijODq;4z_;+|ZYhxk{2iYZT`716>eCepn&)DI8gyj-_VR#YRZrBDy9;~llY;s%I*XgS>FW>yk&HC1R(SOsLyBuVGvZKQ3 z%)ZR_OscUWjy6%Y*ZD?Q!2}*xY+zVx5g4|O0snZz8+}B#pV((KmkVqN5AW1T+2U%P zc*$Tfl^^7Pfuk6;wE?g&!dX$2qIHX46P@q}fO(&pYCxaI_VXA*lrfiY-Sm%pFS|)+ z=Hm0@`KFr|x2e7*Z|N_cp06P4_(ffO*6xOdWwmY`I8U*~zaMtq%T8Q4b~n^%V}=Rl z=kUG@_ge7^52)IrQcN5&v}?J%Ic#J8)lzxr2}0ycY3O!d4*(Yv>KNG(c-gS4w;Jseq1lx4#EsYe^1+r5Ud(m;QmVqjA)P zeE?LMVVw}{0?f}-*sOPa1l{iT?)P1^eOY3Bhj^x8)1r2hjLLuHk@urk(v3dyHi$&_ zNPOH=;Hd(vR>m!f+#25k>v7U}(-o)#J2Gh3M&5;RqZSH*qc0*@=Xt)-w(4^Cvq$iaL}Rsq6~weWb0;$JMho1bB1#EO95&* zBDkNb4@tBNtE2>4lmtu&1+y*dA^ChX^Kdp13)WiDDMLF+#C)T0#MCyC8w}Xp96rs7S1Yoh)^bM zo%{0sL@^=mRi-)lJ_{w8l!{1b((r~LG9^!`RErbNV2IL$yiC^4AZ0@xQpex=u~CDm zkn!@E!_oLcR_XEFRuL8-aDNVq!T{nlKIt3|Bdc7()nF`f3Ev$cx$h9n%|Q9>F?mg< zW-33=W_4ZkdqriK8UTY~W4)Rt{i%RV&R+jH5*F`a)u<=uh1%L(EwNz4HJ7|B5g zjtv|!S9^Mdjg2TA+pQ4L7Bk9KLa~GW`XB9skEg%R{)#>RfpCH9u9r2XR;kc;{8oWi zbBHc+^o>=SSw6B<@Ykt$n$HZ7&_=T>z02XS+E~Q8>XgzcrAABHxbK-Df`ZZ7MZM~v z>i)2WDiVM<*VT_WFjnpW8Z`Xpc6ganu5OawcO!cm{K8^(%yY}>(sU`F1XB7S)-o12 z>)C;s=Foqgf@QVWu?-dGpukKPv}&M2?Xw^rW5YfT$ni!~^@YZUu#>PTEU{nilk#f{ z(o%pc5eTxK4#K_PI-g-1t3+g`#T`wl^#n<8Ch6|7mY#Z1;$K(7nVABVWk&Weq$7&( zTe>}2_7bi8T%h#DAOy*di(wjB<4uYE&W{1<-~ip}B!m+u>7=p;k{6Nfd$jYePPn_7 zX^p|NoxeB&LouZj*nv!jFDAFekqKLcOFu{iXiH5%6h2MK zztB6+zx2iru|M{_b7lghf!^nL;^}zuy^oFiNV`NJeg{JnNh0|Nl~c^(4wMZqDhjeD z_a8C3Rv6EAAuxZ~jf5l?O5@0f%7olq31HQh~3&65w7Xw+qE;F@m}z)y|m64d;xoSfKD zqkJD1_3unme1ig5R|}{n1Mo|uHY&*%kgDuL*S$_+u`Pm$!9lRF_bjDj9P`1ekM&fT*wjn+E;D+XJ*Ph8q9BJSqEjyH5za$GDW4V%o70=sPfPSVOBB4 z`Fi{RnOAklBUp%UWs^rJw>3)8Z1j z!66j~v~_SyQPsFBnhwgm<|%(tPYZk6EyLtgN`YmxBvi*^jm4tJo8Ji!X zfCn6g6W2I>-zO?CyCR~=fbv>VWWWqmOVn^Go`6|(4#&>(rey^u(ZL8Ld!*xsN-m?o zZc?YG`?P(-gF@!(5++>f{1kg_gQv2`vgHlT$HzS3M~%9Wdh3Goq?|F#Ua)5n<3r0J z-v0GP(z$Q8aP98~{YRcMn_+LpPYNGxK*WBF6z(!?chH*nME^SFV+Vp>VZ8f= zCyZQAZA81ZdL^OKC9&|7kuff#T`W-NgJhexyMMU&djOrgTEV`GjUF$2Dx{?Wo8?mT3Y9cwUv?@at@m5q zq$FqDs{~}Kdb53RrTrx+%8a8*4q0j z*Kub@-ycL4thitfua>^@t%pQaHTo6EDwCz)J_Gh_FP#V#!A<6uN9zU;>qaQd zM8e8;=c~LqT5;H4o2mV`;9{qMOPMFK zE;0j$|R40@@ViYXlDIkUt{DqsJYsBMO5(EI@IPEdy_=el}a*#q{HRaR7TK$Wa6>5p9KF0 zf-e8f)LI`3yaf#So4ipPn^Yv#J6)^qBXEMsoc)_ZAJJmQtJ3G|@e3pDtok{$vA6qQ z%^$>h=ZNc83ZATNeJ-_p~%I#{vA+VwbefD;!i~}6N48a7k1&3eCHJuovQ*}#a z`a084h_hV)!rDiNgkM-QmnQp5E_IE+pT}*5N(VwmqJM{fmy2arFyOE=9@{%_nt?s| z#5;2Xs=Xz&;?9cKG3lT{d6?+!0K~#!`&b-LND=Qe0j{<;M|yB%0&S;5?5~EKtO$o4 zva{}AyM1^-ma<&uCW9gX)3v>_)o#l#D1BEP`~)E1HVNh#ZA$o^HjRorc2c6d`l0Zg zwNyV6IgPt_9wTRq#HLPwy|`FAv9*@X5v^Olu^EV7s215RdAA)WgJKBs5djRVWM%wn z*-JwXjXOr|bMANPls6w)@IZ5h_J8q0QJT?li-x543lo(6h3)xkPPF1gaKl`8au@1dzQ?X5EK#sY(ZQVeX) z5FH0-RYL2q8#7m%b`P6Wh*iL)n17O@VLAszW}8t?wYo4I$$R!nI|cer$5h=-AJvR) z8j+_!NqS})XNF_p0Nfxm%u}?&7Ptay+;7PL!rKS>9S31vnW^&Gm$-F;VulT+9wh2 zy&BA-z)&)*-?=krG$owmYU%v$RenKf@EhpY>LT7%n1?CpwL=jz6AFE=vw}fQ9~^Il zfue7XexvaVhUBMbVtPW8NqbV{P&_m;zWTZrKDIt5m70P_9NK+tqoY#AwxI?Ii@WE2|9Tc%h^0&Wu3X^i>9+ZTp6p2TYP-(-`_umY3Zb@xA=uxUQ3+5xUL%CVcYN= z(x_IV7Z$>~66cS_$X89`75Q8A!4eIyo(N2pi7OqJ3XFvO*ml8_gDBsWz~V5qs|=kp zSo(Gh7e-5xQH%h=BqvNLoMa#f+wq5kfXMnCQzJZ2H1?tp);&-BXgb~mC=AA@RsMXO zrCP9k87J?xNc@xU@nN@nsQe3Q`G>#2bUH6 zY^2N89|$JU3>1+Qt}UC>^CM!AusdZqAUU#bZiKw{3wlRjBgzEg*8#TMJY%{y;xE(n z3#eZ(l?Vgzf|q$egZ8+vp&b2T8SpMqwK3HcL3pWob_yB^p$0~k#6-V~#`QDWPB_8;|qv z?Wzyp=l6Ozo>HR3(qDQa2O|U1mVVQv^sz1fV#p(DF;txTG+`WeYOwf=%}Z#8oT?fF zObqh(A=NVmm>Q6Y6g9i|`9e73{knrRq=|I5e*O7LCyc_W+NFT}BK*GGDZdraK$+2hq=bk(#n%{o`({F%Bx`UZedmIAvFDr1<6WxEp)I{0(!7HgI?7WfG*NV4$V z;1ngG2>%JKaUr@KtLlI!?IGyIZz55HZc|CeJPC(G6L$ zjdta$zGu(`Wx!ES)8O!;9`+0~7P=Jgw!~zb-(vHpS2P>V)hJkM)Hb}B`qG79Wnk1$ zqZ2qs<{@-far&~1k((ujtdD`rC(P30=S#ka!yF6P7vGXoQQ6#RAf)1(Zf<|~0wgk3 z$QT3#l=ENS{txpEgqbx{&4gW$ssD|<`lsSspPwQ)U10FHL+}K-1>|{-qo;K=cugi(r){Yeu5^qrLJPw zB*3HHY^odl)5%Ti#@GGqnmyRDO-M0F_@(ry!zju$?l=;kM-Bz9EE;gSFeM7<24HPS z{jz;nSoHPbVk;;RM2wO|wRmX*5DA?c2-mK1@f3h5iF#@c-S#~()TVqyQ5ix#oEXZ6 ziwe_Xsy|CKo#+Isc=;+)Wsaa{H~J%262oy64#}8jRMLy{tO56n#dK7RK(V-bHW>$& zaZ4w%tVOJo6!;`9R==Di3nhdv0t&Yk^RsXKtN<*?wp_SqP@de0;7r?v0n!y zhm!CI_9r{7Ac3jMZuR?3d;LJGoyraG_K82&c_K403^B98-H-dHLADh%40`36n3F~X zj>cHULnt1I=eYD4yXY&f_?fHRSYmv4mRb6dm2A!{u0|@mX)6obKR(zK09|}9D7=ha z_dhQ`8zrabW-~kHRRXK2QHND=H zr(s3-tK+s5W&DpLcfMAw9(<@XXOTKPt?hY9eZ5mzrLCq?(| zEP^Y_;|)J2F@4PpAXG4StCY_Mx7@moKP0DA4h!OOTC(=KvSX(-N|-Cb*q#iYmM9W| zXQIORQ6{yCp$RR4Pg44c$MvH_64?loiVL1|pDsHOQWgB6zqsXT&+1Pisy`FfGZ#Vu zc@U6m%Y9GSk(F|jC4qX)3rpay_pbE6qraE~;5fFpyvxDFIlCvqS>py_td ze{+HJL6P42A{(&ln?4e<3%ereiLoEHx{SLabmr4<^ zyu)Xh?lCG;oFY8eG4#f?4o|@x1mjp>4prpmvgCwAMW-*`2xJ+z%aQv33TfOJLtIPm zaVwn{ajVKUfIgUkvP5q`Ar^Tv{ygF8fPNa!`)!B*Yr58X6k0~`kYM)9Y8(_HR6mQ!R$mJ#I zT>^5tWP((0)$qHQeYpQ*XKS^o5Wn&1tbs$c62+EZ04TvV9Hm64{A2J4SHj*-q0?^6 zksp8y7Qng?kzp1UhP+{3nG|4I9uSWYzKj2IVJe?ez-cq)Yz$#_N)Zd*ZXf}%xA3s4 z3i{^5V-e-$@?=Db^5n+AlpiA=+;i5%V;lAnl@16)c<82^hXkVtmA3t+CQatT!{BB< zfAt{<1;FOk^o@dwjKD3B!@~i7K^cOHXYC{LJmn+}&^igaFl(U}k~$qe4~TFSQ*ct( z{lWM=@w=w_Z3nISk?Ft&Tt#L7er1r^31*e}L(m-mZ)~_xig{F+rgWNje{&r)3nn4} z;(*?Jron57qx02uwRio=@8&T}FdAU*eX6<$1c>8eqxd}romGC~gP8->;f{rVi1bG+ zCMxgKLq8<9W(XxGB%hmz_;FBSP$m3q)4rnLEG!mplSC(FA)jFluL3`T3I&lkO6)VI zi>^~)m#2OHg_+Tk#A3_OtF)VKQkQADrhgu)&*GA4%!Z}y5iqi`#rgl-?f&!mAO9RD z$NvB^Q}I)Q9f2uQ<5Ph-0K&)xn8+EN0a=rW$9B(mDDo-xuk`(4SlQk#>Nm)3l9W{| zQ-U`4`8>HlcS%OnjRC#cedB5#)K zr=acl$f#MwR6{60?w1$Y&=e3@QpHY2Fw!XTlD~5du!Kw7yF9h4&ppV?>inILw%@5a z*X?o~A{;y&_WnH108CbA(k~0#$UE_O{89V~)7@ldFZF5bHcK%LiuM>K9R=dtR~Fs* zXPf+^(>%QTd>G2VRBKCuGZ9ZpYBuO8cOD}~u^rfq1k@T~!+h4!qgUA?lQyhnKZo@T z%R1cY9yyKBhz;(eq>KDTbC>Yf28Ov66|D+T8>WToKt*+!0kQtNZBU`kdkrD-ZoV(R z^zIq8t$PV#^qxmgVg_LJ3*eXB&-H(?4F4K#RM<9VhuKK=r6@CtJd+1J6aP8J3R6qf z8z}JL9}Z2-~=6-FhlKBvw!vLVrf zyZyfVH`k>s3n&-IUT|tlR+mwDoGIwG<@upeTe{H{XlOXx6LL^T_y!w8UMImRDB3>c zP%{e8?oIQD2j1t8?z?!#v6^c{5FU&Jd~YU#Ui z^}!dV`X2O5l?G852$YJ%%3ABv8B$Hr zZQyn$(8AF!CO$mo`fr0R7&wNl7d=}cG<5ZW*RBMgIooK zHPzYh0i5g5M`eG(qZbA)5){ueDh)(|B%mbJoIiE6X!*KS(DN-?o4=3q4ie59(i#<2 z1;8439eEoDw3`9fmC)VUfV#8M`-csB!3OsY>*mZRLg;L)7kWhKz@l<`HDx0%2I~m#59q9I7H+!$+IOMpc(}AquKNVYkX~S z0?6V()LWZquVbT>;b12Jv!jWLeqq*T6r|x6QntP-|PonIF92Z_6KbDrG{ z%z~Ozz>kH@aqwQ%u_8P3(n2$0yWg)%4JPDA@o(2XJ^+2c)(tV4n3m}&4Tfvx@DYO^%8uoIy(gF4tp2lgxdT^V8N8EOPTdbTIB1S=rh-xd?vI z-~S-cAO4nwRNzTR&sQ}3=%xJRQxA92PY+!WWQp&gCIV;w_TD^h<<}w5m`_K{nl%v7 zK@P&TV0Bu6{&$mIoges)H227BV?jJb<9GCT+QFmFS{Dw1_eSHa;{KgUU|r7%_}h7t z9mBDV>q$#@@CI7eBDN}@fF|j;?i)wzKBF=_j*w~2rAt}pldj`Az~E!F!Sg5 z;)$?)6AH8al{`bZU6l#D9ijBXw*8uU_4Gv%vMR;FEgsS0niKdYxE?S3dsY)$Hz4DM zAT@wE^Z$we|6lm$`Y(tHm^rmC?jH}iT2s^Mpbg3QrPkm$TwW713+$|#cg<_c)0#Ii zsY$hbV?CpQ%4#@C)Jl@Ng#*C94df1!;>O}pFNnJx-6N0&(F@M%?k2Q-b9KIdg&+*^ zPkTjzU&Rh=oHQg25uXqwgIBfE&yY_#e-yw}GvMapxwg~OtEopHq-L0S6WIZv1X$NF z0Cc||pF=6J7WY+9`jHJx%epH6IIIr8R7Nsjz?oV0fe>-UbF+UIMFrRZ?j~zlH3s(; zfXRr%10@<=2>b+P2P`mX8NLlobMFK7k;%NNKNi9*z)i62B$11!{J}9IhKNIeDQF!j z>@L;V)?sYi3py)V&BG?Z=(hp3Iw!f5iKaU`^nzoI+gF!)+sCfI@#ryhLOrruRIgl@ z-%^bgPpn&;8+!4|vHZWtVh0au=zet)%WM`xt~Xs|jKTUCX$MTE&8s)$it#mo#q0xoB58GTpq{)SRfP#l8Rh>zSl- zqNF-jbF+SDX?X{5avUhhYsl{8f^Kt}70!Sw>qK=b?P&P`Qj_wgY%L*|W0>PS8ro%r z>AcCYY^R>i8}6|qd*#%YXIO9NU0}yZ3?9pUAp##xQG-xJytflCtxde^ z3?FW<3ulR~-bpyfgf8AV@9UeL<)t2-`y*jVz-YY*;-A|HV^+dW(v0muBorcM`|BIJ ze3X5A$>ST)(Py?QL;mzp6h$by|ElnDSjAxPQ+^mQgmm$tG!Qc5q{6?}*5-kuP{Or` z3N0<{|Cj3yYG~Q=fD2ik=2Wtv+

+Z7|==^83dSzB$;Q$r%35PAOA0m=;;0yo&}<^a^V%(u}LX_wH+kZpP$L-q#mdVDJw#c1&;}$ zcS^>Yl#<>V%vqQ?KY$y7y>`s(3t_zM;UzgBcg#ilH}GvZA#$&tKf{+NGf$`t3!0>K zKX53iy}Dgq4OQTcnWb+&>+A4lt82Rx7H;XY`6Y7+VGOeD>NYTPP)TfWmJe-tuWM7C zL&yeTwQ*ZGe3z>w?StHVT3uTX>~YKpJ7zE8h5jQmv#MS-R|nNP8tYQxgL{Qsx!3gt zt5-X5UH~6=xn^De*1~z#aHEdLnd`^p7&bS=mL+FuX@P<^>5K!@{EEN^vQ(hdgwE!nz>o7NL9=gsu9f7@lj~$5A*D zi^l>RO=2K{Gk;ri*@10iS+Rgra|we)k8u9t~^!o4~>$XQ_&X+E^MH;WE^s#b4ykgp) zBFXzpeje*7`mbrF%I7Y$VS02uFCq!}u%!D7#^j^RAk%vdOIT*k8lewOSjU8#r14SU zhuIv6LdEt_kc)FL3{?T~o1BR(+R(-LcDwDZ4a&!&-GqOv4MS| z;mUza+VC-EI?BlvK9~9?NIqU95hTIw(~=j-*Zdzblo=uvSZH<8f22{T6> zIIyet?RGn|$=HrNZH5i8+bO&a>b7?5qDz#e#r6Jhf$_!OIr6X9tm_gAQ-)|98zqWa zzhQ&7^GF7qL9z7EWeCp$ZaOx=9l@EAyAA&~9(37kj&Q9%ZS7>c2BHALICbhAH`548 zj+AxCp2?!t8RJfN7GavebAOMIBZA6o%>A*J*x@fwgDd2s5D81;u5V)par76@g>4syTA`*wgF< z>n|#p!S<5ox6_U?XA{&{cd@Arw$!0{V!%?%IdG3kM4=QJzV+s}gov94-Dazf-_QX=rfrX3$P zBVsUO2zCzDFf3g-)TJIatANKC?WD~zaM;CDPe5`NyT0;AVyJ-H`hx1z)&X@-PdZs8 z>rUHlJwd1(E@MTJC2Nw=P*_b=2|tv(X+m;kkmW(wZ_y+R$bzu&ZKKbXv_4Ym!Z+m6 z3;IsOG16ZE+Cow*7&uOn=n6M%(2fqI&lu4yy8xbjX7jgP}!omz@^Z{JYu3BHC^^jEZo zGpUZ|gTlu(bM!~ukq~tory}Anx43K+_kbA6PPERnXQWB@4H?=(JtM5A0X%ZVl=IaR zZb03sz-+nky-v)B$~%HA}4_&pv_aWj#S)InUug{Y83GCBNNF*BwWBHf}& z`i)g)1kvEKp~g!wuclI=)679yXVrlt+7u5)vxulMwGum;zg6Gk@sG_H2}{De0D$BR zgC&7qUX@d=GYKs6n1MR)c18@`PD%<@=idVp9Tf}Fu+yW00pce?KWV|(zs6{#(UKKB zGxOuU0glca&#ib>Ru$U!92Y$jz8>FZFV{``Tp2muJoQ>Zpdh&@LBMHDAeN(G>Z=q4 z0(%C(k{Lj=c-{y+7Oh_!&EeBhTmXz*9&V5=5mG{WX=Z;B+;PYkVW{lNwZO@4NvHEn z^I8gk{N@<5E)ys$|*4vt3rYUlW-3^$MH!jQlnS<3TKu9FY1 zPTv;EsqI7DJ#ceu)HPZCo!qebx+TpxCL8Y>p-pJPeaXT@9-w3qcSXx-G62=9&%;6D zM(PbWu6exLGtBA^;3MP;S3SMbWpqEQ&_&l4*D#-kNd^`Y16Nkr@aj0~ zaqAaH+5m5R`abko_V2}tkj+85`WIL)c-_uZbM$}2qf3BUJ%Ij!gi}_YY-_1dFaP-y z49qBD%?qEj>`oq7>JUDy3XpqIzfN^GJllxd*#U|s0s==c2m+lp$SzGBZw@JulSR6G z9i}u#Fj;@;5~l6b>*?0nfFq#YG7=dAy`aKqJm8EGvpqqXiPQ`xn^_iULw8Ejg&RlK^z z3Q-pdSKOnNwAZ~c2>8PD1kGN+JYI1lufC*95>`-Wt->=&1wJ{$6A8wI3!swvcoOyUzxE=Al8Xc7 zMN$Ma(R{Ivz!wO|lSP`xbdV!p7DEkb?WEBXk6vZKrKQ=HSqnvWt>4x$opA#CQTtwT zItJWmBg>q&wZ#&|1&qAB#_dC!C4|8^O7@Z+al-jKKlxD#fFR_`COcGYI{k-XX%@dVw zqFug*25Zxy#dqE150Cd682dnqJUDW8cA!Js%hCc8Dsl2QVFedsrKCeeT)pQ2=)LET%NxPg5Y9Pa3C7sb+~GA#mP+Uh1~M zvk+;nFPa$tOkWco2>`YV=El$D47->kq7-85qXGIiVs{^jBvXyPVcyaw;zWpzFqwf9CzQj}TGB%Qhpl%C%q-ZtcJsz| z$F^;DY}-!9w%*vbI<{>m9ot67w)OYk-?=;cZr!bmRjaDzoMSx0S%I3%d@ntnYtaxK z#{B_nW-U-effC0Ig8d?cCt-B+uS)^Ih+5Jwxj{uFX8sql9HruOfh!UF(qH%jM?3%W z{rY0`WWE5K43eX->{ zzrPpJHK5^}cBVKFtEkmSHKmL93lL3?j?+5@GrOu)B4c#b}! zua+zc6Tul(tUi_@f?Io+xX-|Ut00`=1=zwhiWDNU61e%wB^Ni!zpPSlc%P$!E+RCx^Yv@G+a4hiRhK2KkW7o+L+| z*dY+S_AEF&1j%94P9EqX4-p8>1fD!<1e~aO|1a{lHa}&v>DyImyPG-{}udiSyZq-;)N)NnL{EVy$ z{8KW0C2~oiTh=Q#DKaVe&yul4`T`9Aa`L0(3zl~V=C1obgWRly$QE8q|FJZ<%Ht2KfPD%WMoi7bsIeote24PcbS0Gy+ijW7@|p$K7A3CTb#JGWc<2-u&B_*0=v4UO!fy;L`%lpcfa0*R=7H4 zNrl<=yGRb)Db*FV#|efFxL0g%M&z^W2pN%Uut=iE*!qd`PpL=c?1d$U+zIhwt3sDq zL^b%gE|daKEdOu%N2M>7;<)nbr z0kQ>7?AWv|UGR(N!lD+xdkpluUZpC+X(>`L=A6(Ra~k+m5~k77#ziMwA0(>MwaBug z=)PuC-?N^3yWX25i^l$Lgd70t^&aImy7fNVKVGL4I-u2Qxn$s0C+P3s7z@7y*CC`o&=XKY(wOmz{m`8~QjLU#{nDELNOt@Ltv@Vuq|LyK> zi8a7-90?(LSh&PTb5rw`Z`YnPjB^pZ_}%f82v0gu8cHnI51UW%^nO^#n}$s~&O+fT z5Z-ZEAbNz276Pr-(^;+_{yZG?9KV85zqEX7q*#hADh3Fhit`6{rPOC}ONKU1jtaG1 zWZxH4GK2U~#r%|`Z@P!5flVbfO<>XY82udFuAz6x6oThcb*)`W%kh#>)_v}6M5GA7 zJ>8T!=B9uB?$1-r^rEGToP%{hhL5a5=k>@VY6agpeQl}bZ(M$<*8mna)m8+ih&+ccoM>mBp`Hz+0>0?O z3e9tg*u^(v_E9g-$0^>Jo$E1W2#2hv3!<3eV3<_d&}a=*6gi~XRtL6A%H#su9`5A1 zEfx`mZLBz(eT~m*uQ9me(t5kRXDoJ2E^eW}BJ^hfYw(!+6~U#6SVkb6I|;iaXE z>hq}QMMHrT4Gcn2=F0H zeO)|&10Lfxu09~Zy!DlLdj==0Q1_;=_KyD%$^3yTS=3oH&o&i(w_FRtkS>1m3tE5m zocVmGyLxuxEzN~P(qg4Df*|8_`eI~$H(#pTx{8&YLW~O`%5gnoqrhjA;c(BZKP_rE zw_GG(r3~D9lv~}`jf!1J^b*+cUKTd;bm4)Tss{s59Ujr!2?F1JGwu3ErrSw;c}nyf zHTZz$W#*arey>|FGq)9c0*8ec8~3`YQQ6cCq~uKghYE)csG`Li9+^|}_Qni*WpD)4 zH=GH-|Gq14@Y>7!@W4!t-Q3EPd@S&q?;SOZ4+=)+v#t3i_$(JF zpH76nWEgjS8GdS@Lg_-wzPb_3;B@khWc4Ir<{h zF@}RR1pBg|=;(*$gdfB)bq=t{1J1DuYm~{7g?u5hYF_m68VG6V*jR`~GV^@xC5c3t z;|ZBY%!s`{ZgZsac!Wn;ftDUvtsLuoU@=w58VQkt2y)r|JO)f)Pz$?pJ90ZflU`QK ze30Ls7)CsC9IT#uwwWeU=85DK!h^e6oDYsnST<=7`@waN=ZsV~J8GH$m)y?=6ug9sXZ`tGa#DmuAJ18^vpqd z<+Fj8;0kdYkNYbcYc|TC11_S#EYh_F*O3AW*W0bb5;6@W)t`>!t)$)(RD(2K} z5w*0^dsb&p@`Sk-EHwJ_COE(5Y6C`W+HGsHf(XSI2KFqu0q=T9UDtf`T9I{ESy=%A zI&|x1x8FAsF+RJsQ`f3()~?SrQ8T)(Vmsp?Z;_a;@C+OJ?l^_nsWh3dYL^a5QdIy>EhqTImx=wddqJz_ljBi*#jG^ zpfNPsSO(-suhI7?#0+0D=V-9PvYD|wrrQ^1p;wL|U;1?SnnQHGWPDJCh5{>*Xmm!k z4JQymg*ii4LhEq~UD1FGplCd-(K*gSbT9q(I@mh9f&Nt>d_qK!hFX;&q2jfk?N%Qq~jxbwLS7_vVpLX>B7BNi<9*Yl}{ zEdr0UQDPr90Z#p;M<*o7^K8|!Y?7d*CFsHE!zbeYkTzDxp*YY?FuZA*4hx%*B*%D6oc;y zfu=q-n+jYVb5O@0*V8u)EL2gAr3B70MFED{%Dc(ps-~45|qD&teof(>V7I$XrU}3-a!J)~FpS zpilR~#IKnBe2_=}-edlv16uAU#R;|sa9ZUOFtt3@^Lt(97}#!i>FxyI&+-G^a5@?v z!?JG3^BM)eHTMTAcb0*bSJZ(z2HU!%a1VYOTTzdXM^9%{ZygF)pOv}G_B-8P4X?=4 zX!=yE<@5%5SsEG^=XDQmxtm~kGe3EELH1OQLBGHpo^W@-#sH#VWUWm4(Frlk^IBN0 zag2Zh+&k<9WSidMw-<}4WV8~W6-n{0qs0??Ji!7pyT|F_TV;lzt?AY8r{4t&k zy}P3(NwFTPN|sSj$|}#qfhYf`El# z*?62~D!p7(%1Pl?(4u9Rtz-a0W$vh~xIiai?`++#J`;=2OVld4ygmTuxpi;jo{?S(6zjK_UGmg|aaLvovbQ zJfKoKO{R{NIG<~FBA`xLjTIF^M@k~2&jWy+k$eH$??n9)*stO~qCrwUz&GQj->=q~ z`*X64%?<3jSX;MPUfp$1bOdz34Cb0<>%?#?=-?wN#7euqkbwXR1MKx)?||>+}X`1@|jRb`<03010r-LrWtObST>DoMi%5< zFjh8BCNta`>?vS^h%QD(yU}WOg+~1tG{&P^OL_8AbQWR zwi>YXGV-;3+fr!8TgX3As#{KjI)$2DDBG8pdlE*;}xpGRm}%rjwC z`L;WygGVvf6Co`J!w)&kLeAqE%1T!Bu9?*|XwSOOYvrFCZQ?eT)Oj)HqxDneD9_l( zR+=WS>A+y5Utj&CIb+^Ay&z5KC~*Hyh_s**07{Z0&A(v__x~Xx(yIFbilD`$Y2W<- zE?`1^5A%%|7;?;K9e;8$pDXuhyu|-(TcARQf zXf&osv3xW!MyrE5pMRNeaEUCNQ(=&6Yb|f%zgOnyYAsy8CxD#z zascILw#wFkh|mRSYDH_g_@Y8V$3cEQC#DMcD70=#G}QZd1O z6oiOC27^ACG$G$V9qv;Bjk14{+F+VIcK*5AHwf(ndPuz1Lh6fMgX?3?TY&Vv7kayZ zli9zPW+3J74bX5e;gY@9dA4cg{e~((VGoIZ1mWoavL(U%YpPdF4Oaw+;pfuTlJ#Vg^M z9@Lf>0Ss@wFWl@PA6q(a&!xI)JiiB)scp@f`tTf_Xplg*D=zdE`EyD)MF`rYv0V>` zRgNuntD$Re3nHT^hyWc|nusIWh*?ypf5Zxh|AA8%IiLbNE~Br?O$mk=NJG+y2kPp7 zRPT?Qy)uo%jqzTUf-Y;{Ije_c|24wNQ9{ih+eGm3JfyP7=5;(&47vji2Scdh zuc$G!cZYD+1g8oQw_B}kdK=fgqtvH2Tn33tRiPC$WYQWyj2?*61UU(=zt>zj z{eCA4oOtJ#0~zX@V<#tzx4)7hUrdvpDBYkDsa|P)D)wikK)Y}7Jr$j^G_cyu&!74@ z>OublO>a=c_<0ZcvF}Tbg&FAbBx`^}M3(o1&{-mn{{qN!j6f|s-ddd!w%{JeQH;zv zBC{f(GT@Bu=R1(tP7i})|y_Tq>^7G!(?bl`HBTP8gLWb zF{wy#Elhu4EyT(}D#(Sp?DH?7waEEG^RZ>CV1RohoxPZ^!eNW4h-=9f$IC2nM`qt7 zNCWqamw_xMf#;UW#dfHmjwO?Pl&blr2DH(_z0a>XB|Qj3bH}jh$zl6Fn=i}C5lT9f z^_q{=KN_hq{h&NW_8W_`3c_5DT zOz`>_Fcy&ihNaD)RZn`tk1{N-zsXNA*%rD-c>$hZ7M!X~2V-{@Es&bYR!G>qbo%qT zbGbeE0<_8M5|6Y#a(gZ<*pb##H~dfGNuJG~_b+Y3a$u`2X~~FY-LnI~e%jApfts`r zNa%EgE2ffXWRRU(o_0PkLlka!&?2pp^_1WFel}li>~$RuvVev3W7n=iD3gESJiyh^ zlmUlA0vPe_SyHu`UxiT>vstoAoK<&!p<(aLm!eQBdl_cWg5ib5f=MmT_ZR)tiBW>5 zo`6PHgvq?WXCMJd=Fyn!xkD2P9?)+)?cWYDo~*m0i1TZj@=7u4%+`Qy=y5siuQ6glB% zVXy}*h`QFmv}XjD^rT?fBTLHWSX?(hAN+F-ys-gG5;V&$ANY0scV46<9R*eV{bD5w znCP^yAIv%l!>e}H8sg%q`8*~3*5zB}e-Qg)xhEVHb$I=?98~X-*+*tO<4sxnC>pD4 zA@pl?IjY!+;@Uj`@`XMolb&D3Ym+fKTs;9O_Ce>Qf;pIhXKbDg1^20faX0BT+z-lf z>$7p1Gha4oHIeKyZ>`<0TqEy?%Yk---k24O{wKl+>^$97Y&q2CJ=!k0ur0=CUBCK( zw41=*qRDhQ9n)dAG$&bxkCy01e^&YgYBUB#qcn51JjBCs++&LtT@2gpWotSpP&pGQ z+3)G+D%RMVUn(?7j%QOLQr1~fe)qAuhZ<=#Q zkWnCpA18G|<;rl}`64p_MPC|(+nx|ON$@V9-s-Sl3(GnzP+`ZUNh3p?U`1t*Zp5aj zJb(TPqD>#c40%eMwZi4eN0hVWKXgo7T_9((&C&K(FATEbkFs-;!3tbL+B>|J}1J0FM+Z&6#Hw*-%d={1BPTIR&>LQ8mDWz1k_*LMC8tOf!A2V5n@4p#$=<8HXugv<9P$AE{Vx=!XOD;M7FI&I<3%#FzqRpw@McQEz&5 zR526jx~DAg@zw!to%%V1hg_?u@C_&&UWON6(2z^?M~{1!D1&+V{97X8skczn5S*Go ziA0MfXeN-B;(=6NHAMadBnXEYVQosx%{ds631tqDLwvX6k4O}L0-^vE4HJ%OLu;F% z$^EY6-oAPovjUiJ6r?K~Xr;Xee0`Lj%wSBuqP4@%dF_^Ve7%r;|H$UPq&zqnIdG8* zyz#@o^1=&B=xk7+5qtheve~?>bWOq7WP~TX_`*tbOl2*c_P|$2Z1D$4aqrX+H29XT zb%n;od+oCGLL~<-OpE|QY(?=>9?erLyG^b{gAG5k=M+bme_wH(+A9al6@3BEA&z`H#bct66km{E zXGNYrE^REO2HE^Y?;algkYaKRXJUOLc3jCK^hbmysqvyarrNo9PbwBIkKfl8Ae`k9 zWyU#deZ1*55;p_ZFieW$-_`maDK!)JbYK^fAx`e4#G(-7MRcqcE4)YWhcf71tdfuK%u#33R zQTelQ;Vr)~soF2DMh{#=j%t2gqRmFiUwUf>5ZMz~epg(t+V^ zrPvx&$GxL6Kw7S`dK$ul*ngW6Xi`j4Bh0D|3$IqN+jRkhG_(w{l?kYzTAf`Xhw6r5 zEgm_IV5WK$^AI9E{4NHb+Da{}D3jHEB|tl_$t-oYu8hZx9zebfNZ~~l`o?d1hFFA` z^13?q@!NY(L8nyY`V6=H4UA9e6xjIWH7h6&cUyjJ6I5a(p?fb}cHP5H1+OTqAR{Lc zB#<(xZ+-*QRjW?fANzLW+}dI4#ZPS7L`!t7CPv?>q~Qa@TlB+f;I=F~iS6hfL#jz~bxdWu~V95Tlekt^&4jaN!x z;0FOGyoSEG2IK`LX4jQN`YNOp7pyWP+F`AqA9!M^IKs3KS04WGys`T6ZhZVs`SvWU zuYbjDe#^c14i4EU;Pb33FMRWt%fE`XrIW3dr_aUF7Vs3`qmqheW%F&iHW79mY&B}h zWrxp9I{te;BdHf_@yc7o?2kX&RNS8IDZ&Eek%olsYlFZ}L|M)iP%}}Mh2O)|rI>+! zR5YIVBa*Zp%&)S7Cet0bR}1zGWWPJPX$2$BD^3)eQxi{@xQ9X5f4w-k_#Ka8LZ%cc(MT>!T`w)JEj3UaVIFl7 zr->@^a8SGU-ssMAnwuIuD+)&MesLV;%;$2qaB(mJ6QqVyAC4`QY!;iB%#&Fw|Me0j z4!7zK_yVY4(3602t*humQ^tkK9v>T1Z2KyLNY*Rq9`)jjZMY%b=qypfQ3y=S+ z2EH2|D_u#rdU3LFF_iC*8n+l|TPkZszm|(Tldc^pjsZs)EEvS7`~pf+Gremae_L z<(QV!cLC#UOa4{z{f~2Hy~cEC z{g8H>>1-YrGI*Wu>OlKAh`-D>67D}Bh1yHMVC>tyy%;dCeor7hX@vO01USLBnp6Jb zjc3ZoIN$L^dveM9D*^?-BoNM+n4}b5=2x9gY;n~2%iu+t~u1WY7zk_}M6 zQ67~djzJRk?oY*M$QF)qk)M>%8Tg+VdH^!Q8UfXt1q4elkL8B zVGIsMqrjblTj6!kVr-*30(0dIMr=jRX)c z!K{psJ%fZK7E#V(Q2}p-2_Shf4@tbCna)9>tv3cWE!B$SM7Y$duRqBcF<<#A-TRgs zah_KF!X973`tDh8ni`-OBs{i@{~Q4w*OltPg&)ha+&{*r&S)wTQCA9wX6{+434q;? zv{6%+h_|$@grJ3&jJ|>Z;c;~hv1MhdJI{6SquNl)H9IoB7T|6D20HUhx+QL`O56G* z?nPLHqZ(_!ewsS_Q@1Fo2isPNqXs+$(TwVb+4YDg>c(T6VuAOhZiGCam00YyU}@n+ z-Rj&Eq7QQ{XMSsVFhm!&ZdPiPk}6;-bB(@x#^eD++=U+cuMCL()I4|Q;V_W9--xc^ z4u@zE8n-f!2ADyQf>~}-wHvzg3R%|?I+@Yl$*;I`4K1Q)FR%->;AIuE$boe!`A+di zVWz*fNGzo|O5)q`FkeCaZKi87i;up9IFfs)U{A#P_}jcj<^Dy0kTb&gib6DZ$SxyB z+Wr>S7Sb<%x-wQ~`=!1V!>8Fw>l=sUuhXUb&mWpL6+k>%B)hhE$v8zt&-NOYUM7Vo z62{sQ)eIH@v=oI18eURt1o>$jXX>~+vB@jyEtZnrB)(&q< zz0G4(V6*EZJ`jcA2@`&O_=j5q&jep-oR$Kz6bOxualPN3Sx#UK;nA8cTA--VmJ+Xf z-w_4E32;(J@4ZJI31zbd1AD7;^z9n20#o#KR3z={Q{fM7DH&OE1D_vfKrVeqpJ~F< zJx#Ccna#WK%v&rQ(Udp3$54&G3%nU=eb-2pVqpjGjPd0l!h0Grp52?wh4!fPhm#$a z(FeKTW=vCgGKO4#7d5zR3Gye4^O+uHAFB3AzJ%E3c~BAL00J1&FAFj$`iErWMw-uc z4DsmU0^-0Hm`}e&Y5#s5`g^Bjc<7Ns8PZHjTcmz?r-GU~?qj*zlU|itIl+S)+%)r5 zKqhb(O#~^w3yDe7!a%fcn-xr~@}KecQsMPykV@2rf`i@ZE(q%5Bl+xPT45etK+Zx6 zC-^q$0GP{uSMG3+cK;tnCEl$@-82>dzdfVV_f2En==+X@%qGOk06mkGR6ZBl40w4F zw+|@EAw8@AQ4#}~|10ZHYjFalOY2z!$O0(R;MV^&nNyjjU`oprp;IQ83+)9hSi=4& zz>sIKk9A|}QDP@V4=&KWvb>@Fj+$bok1(fkyE;!lZ%Sw1VCJ-ZTm{Ip*GvEY13;V+ z&RPxVu-LARbgj4CHjMroP9xvMVjF$;>ccnIB)w1lBVU`s$N<|D^{E;l55I{B6A=xsH z6>XzvIiu7%OEa5VCQG4~K$mla(5PMkKfRvw6eD@SGR+-YIlZ&c&s z=Df<56cISZTG5f&@ztjFlSY!^b3-E+Y$dGf9w@`s-6fjweyuu>`o-h}j2hfFL(t4I zv?_3;JUjkRj!oSvYs_%q->BPQA@;H9#hTW2aLknr&vp+i7LE-&D;JK>YR@)$Kcrah z=IFbib+6RgJJM!Py)@1&kDAtng(If$d*is;Ll0wHd)%cb0Xh%tQfpL*1LSJt1%pCV z$gZx$5`)`+fuw*jFv2e6cZ)|t29lkX|M3Y~nEnf(N$cMMr~swhviL7ruj9iK$#vxP z*u0skBYBQ_pJ&+GkSuPjL_fW_@qZh@U^9s;7ke=2^`$0?ju9y*I$%RBc-}B1lTk#+ z3d-6~*A_H+>kX6b!3Vc{*N)40TRu4Pym^}{{Bwt5P?8;%CsCQ9|63RCgceL;X`KJu zp>lyUS7$uPqCjTEPBjP4H?X&@@f>iFUod;B z4}f$=*YaPFF4;E@omv@Z*c}zuYgkQ`Qu`I&Dshw~>iPQB4kmZ?hDLhkB?CcXT2-I; zx^7Q_?C3E?P;(dF_-rcSq*RpI5hKwhLaljY_WW1(Hoxhc<-em5a>}{9H3HFzlJ@ywxVUeG;#_yQe*=r<-~+F=yHnu(`Qj4F$0J~ZWa7BZ zTj9r6KM<+W5?qray|`?q$K^O%c+Kg%0m|I2Coj*SW@b2v>y&EQtemh*KvJcZTCSV^ zp04*f?$57HkQ|$%yg`xrzGiV}NdHf5L7Pc?{aYQyY-zDxiCg*FmeNXIHukWY%0@6j zo%hQ(j5M0}5sz0-lYMg?!I4h|{*-R(KP{o&B0wf{G5w{m`sb%Mv+F>;W4+7!Ka^wj z<}12YJyML*?_`D^*PgXY*#_lBD+|y;!s$2AB_KQRwoD>=GNz8Tzi9zugzWrtJMPeE zHqnu#_^ah1{N{T&glr5;;%O=Uac)y|R5P2=GoTJ>UF zCEzP7ieP-j$lV!hi_m%cXKue7OyCy%9)SLFEg+XXwCYL2B+}?EN=>7bf87vjc2VrJ z{`hO5$X-fv?AnFGOTs9uD^6FZ5y{XbXJfM4J(!y?hlr=e$m?2T5_DP*nCwG(-jr%# z0aw}ymU;L0e-V;NL7TLFcd-=<$(SHERuH!TX#rOVpzAMmu(SGIO$P^(zO3|wECgKL z5|Fq^f-JxKyQ$j9@*dnWHHHikux%_MbMf1oUEWMHnU%HmV_?`A0ozba5O)%k0i@1i z(_=3jF6}0RUh2eP-A{Wj-+CYrP8P(wfpIFIs6i41r`)H3F8+v_w7a zhL`zC6b88tKs`)yut*r5%|3W`JShd4bvPmL7qgrsvOU^uA+zl=sr_W4#1KZMSl!Q- z7XwHAGL@%OJ~I`hrn~~VbTTfMoIE@&(uBrp3>3tt{&Ix!6QssqCIfwlt_8*$%SID$ z-%IEcD_OwIj}>X)v*0K|xI;yz^5oXa=4v7)FhiXHk91>V3t^z=BsE*3KiKA0FWp*h zenazah@$sQTCE_+`1Dpk@MD}PP3ra$Eh)nyfU3&n{X-EV!YLtu*?fn7bn!B6h(T&g z9uM04I9i+rJHtUNW3jMY;Q}W!1BOgT4)O!QKWNq(Be$3;CFfJo|7qIB$7?5<-V-CC z|24R{CZ+R{z;LavKE75iFXM?2w=!E2p+P%q=B}g}C(i&gJi4O-pqSbn-!f17Eq#MA zH8lG%3qk3|c(6Ml>?jGBB2bf1QBYiDF6FZckp0zX#08i_l#)*bE9$ey+&8K?+^=SaSVuoJinr+)?HRA8Z`hTip?p^JHvE$VTjP72 zAhaglhHY=Rbywut*Ypj~%Ma1&ZFV4N)dkMzQh7Z1cel^p#vAd*wM}Xw-c)w*J3ae9oo5p!NNwCy0-3KxEW&?$cstaaZ5_IUx2Yw$W49MSR%(O>Ksj*IJjq(1Qql!xndRe}Z~Gn#PGf z4N6IpuN#?dT?~<|yUJPLwri(tjtx0g$olAj7_--dV-Hnf>5^z&TuFAIsoU#P^(QEH zmLH<%cL@VM%uG0Y68X|mNGkqaI9Y)wBxabx@(ipnWaDsC)-DlyRIJu(l~*ow2Sonf z?mU+g{ybed$9>O>(W0Er%ybjVg9Y z!SX{4Rtu?oXXpwc-lumQBM>Bg!6kfU7Z+Q%IZLFDEtT|58_;C z=mv_OQz2~eDXaG5%Mz~p3`(BhV#5?OM{@DPk??M~tLW)FDt*;M0ehJ3OFY_pFnB>#yl#SBy#!z|T3Cbg+40mPhO=to?-&G3(?BJx zdZXK*iU^y>7u}BIn-(U$^)KD_o#EIGWo33bKLbfh6TTEDjtDDLQh00s!cY*XyMwm#Dh zsOwt42T|h|oYleT@ZU7#5cFpXZ)+Wil<}^2+{-D#ELW{eHJ!I*+naRU$X!reyy|N4 zV$qye*}%Vr4==&C8cYrry`_+*)u_bKKb0Acb~k(LXyRP*3X&~N7;KZFlqQ2!{Yp!% zj(NMPj;Sx7!bb)zaye_R zHB9q~fBZLculs=g5#%=7q5(&VVm`{O+v*!{nm`91h}%`zM3?1N5x;m}j|@nkp!u zijUZx@yR6jvOGdR)l&Engg5f8j*&x5!rrjJje-ZU%IA?qvzrg>2?9O`spiDEve|4d zdS8V0HhK$rPNyJ;{ojuIpE0t;?2nst;N7gdmPEv{q>mWswuA3OeJW0Sv<3tROqaSU z>9fI|D=OGWIOV7mUfdkk6+sK1%P7U>*2rskSns2)M2nez#p>IK9BC@iWv1PFJCFRQ z+1MYrZH|~Jo^vYD5jcs?du6d!opr!=ZT^Eog8yPwgRidv@cHua zkMdRI*$I8yDoTBKd(Z8$l7)n3elBCR9Y{|cPGsaqq3nRHlhe7kd#Ch2G0Nv9Et7KO z&#X->|8{^91I$&t;nv4$C~tD z>kovk>!|4-K@h{$EKWJM)oLg#V3Z~-ym`x%#?DSp?^7#v7-if8OM_Sk*kAStw`>u= z7hE-sxYp{i%Zr($PA&bWtg5k3gd5PE1&N+g!K7=cAB~!2zleRxWZ*t6gRi+U2(Wf+ z_9}?UY}2p_$y8VK6{R2Qz4V2-m|diyO$LqI(EV75McMsLXC$BncLKh1kO$(# z$~^;-g(-{^C!zPc9}OUMJKF8heP8=IoP{OPexW-K_hJZ`ad~+}EO%Esoo8&MO|yVE zdKj-KZ?xl9HeOjoTGLl`0RbS_@|=S6666IF54R@vStG}VaiOQCdomA(AKbpdCS?bB z$Ugjm>olPqhscCK_g1G<^W1Mo>xcQ8tRR=&^Q6@U$3YO%`>kOBwUT?g zF}`JKA$&+!LjRA;70kP!pFf^a)#>!ZVWRuHKMz>H3sA##xV29GMp=ppbZ|L7)@c<6d zK_#`EO+F(XDjhRo%=gcf!0XlKN<}I8CS^+KT@w>3_ROD5>?=Ke++0(t1tG+Xkx$yb zFU4J2F~gCx-{_HFL5u@)z;tS=sHmtq-z-j}zm*7-hhDEJaPY^PGF&YX%X*Uob@WV3 zPIxrDSa+0ai?ztylW?8r8HokwlPl zC>>x`TyBynLND)mYJ0r8_leK@*)hO@XL{99v}T>H8_1BJgw7&GX}HgTRG`rzmFieG zcw4H3DfM_^yqNTjRz?y@R3Nf*zCsiozDS%9hU(xh&u@6GSft%&(kK^LoIaJlK-U+M zzQnZpo*TgVxd8|U+ZB@`jGfEn+qJ*gH&&g6G4}=p#`PF!AIT*{O<6i+c2k})f0-r- zZmPP`j7a!hHz3&f$^qaU1~kD4-Q^bjZJovt*{jGy2#(bs;Zy*TewQVYI^qa%BMkS2 zUV|4%twBn;pvFE?yJtH_R6o%vwvAqS_iA|3=ATu}kt&Y?cQcLsz1}f)!yg^8a5_~1 zt3zBg?IQh}|1E1xV!81zVwRHEnB!gR2m5wSBMmLHgj_7*%f5Pj4KN#wQBP1jfzZEF zSU81Q3*&LK6LH4ih&xm<92%*vX)^0|ycf;PxITVPlO{|47+X~gtpyg?8!R8y7C)8A zGwE}6d9+ytzYklb#PXRulj-P-NtgA*V#$U|lC5LDeb0b}%bG}C`saq#tN#qRV#32h zM{LPI>fJwdNPvKPEVl&R$D($K{)zEQVUi<>ND zEdq*N2QR717*Lrfi?fbkfUlm4y{cyeG;B(vJLMk}N zk#;JRGpu#N&upy)!Ln9S`&m2Pc;4>@k|X`1+}J6*fY--%4V6K0!v!}qBizm8O|}rM zz+(|z7FACP>~H@HOUsh&*u~H-%+4yKN&b$s+S+H0^bzTR!|ns_~i@IE`|~+tTi__zpY*gq=dRH9;%fPGB+1Pf7SXw(;G1D zc+pr`dji5C-|x(D{xB)1_E^oQr{^?5Bvc=Z$d>BUQ=b=SG1LmrFmOIk6%aAo_xrD@ zj%?x>7TAvwG^GQHTkc+a|D5Ixj|E5X`yj$;od!PL(p_pcjg;sVFcQ=#ov|Fc#XG^9 zV;Ge*(4&87(ZF|rJAgm!?H#}kL_tH5q2MN72}JBAfabr2*fGSHV9y zk$L6V;}|f!U4R<}7bB`{W^+bL&D05B>ZgyE&2sl3OSMIV*1YCrOJ&mo=0OnDg$xeY?u`pjT)U)=&&3D? zGgRw%ASKM>U9wmyNzyvLPpC*6K&26bXKe~9&K%h^NKLQ^9nZ;1JwJAby)3DQ66HUG z*g%n6_lTZ9mnb&;0S&mqm1=z%V#XXBJ#YFa6K0Lry$h6xA>YzJnwUDBl-$=SI&2k6 zlZE$k^phWX%z2;*7O4gra8Z5^tX*&3Of*(DA~B&fEmdGRaFQZWAR@0`%S>k{@6`=% z&*0fAl8#PuR&Mcp>yLkz5wxBPf~_I_T(+78i+LxV)(9WO^eL!!32KwoJ| zy9Pt^Hz~w2=6!%$;Dy9+!p;=7F=tC$+$AnxsGuTYu}JVtw*dfgpG*~Ixjp<5XyFf) z2AJtI6LeCFbuRWuvyv*K`TR}lXs_8y()g*Om=dWTtU*}BscG*YmvZ-nHMxF@=6jpjJT$@Ab|XQcB)p6npQ(5gtNZ~3fvYj?Ly~_6@mLR zQLXTf2~_r{8%xVN^xeY*)-%R-elv&%lvy)SO@X}9!lpK)@<Rax ztIDTmYTZ=d1@5jkkD*5xa1%8`FA&#TekaXs9)&?T!T0Nv#4cSr(w}KZm5q2w;qIMH zv3D?LBo6`62Ws~`qQGlZdeM|uZ{SDZLj0M0h>o`C%(ynVlUQV6jKXXgL0^-j^5cG0?RY}=a#xgnH@5#^_ITD1d>FBxS%5+dJbC)g>B^JhwGJArpYzEw# zr;d3KH-)YX_7q5sdU$>L0|(-snPe!M6~!Y!kmVUlU8s4EYGs)qYa}w-{fN3KmUrGy zGPnNvXO2CU5<6{|#B-oH!D1_=tk^Kz;PrPvv$p~GB_B!*`fpof)qH-+by9rEwR-qBl$d)uW><}w^H?O1O+ zmRhfqI*bu$xtOCC$dXKn5gpkcA zw);y$6e+XDSKJwl44uMDJX4!5jTUB6@Jc3^I3#MQ|EN8zl!1t_El<+AN6S8m2DA#+ zxI=z$+?BhIMV^tU0uq~2%~86t!W4%RCGgl+nRd*w<__ib%A-T*@JDT%nU?+>43eRO?>j|}nIB$PKnzThIG{RjH6U#(2oNi10 zfmJV9?h$MU)$T033q!sGM?%LNjXsp<=qs`3Epb@RpjE7KRH5|{%e+4NPUpz@aASBt zJ#Uw+O&xNWVXNKV#`_oX)UX=Bq>b zMU%1v>h9h!v}6?RI8$6d%PXj3Xvj>GW1EQ+&jMBxdKYq>?&1i#Bh;QViojwO5(JD)%| z8r9C-k;Xte*v7lYsKNe2Op5&yX6PNH`nlfx**wY*v);nR#|Nv0u1lAZKYgyVZ$Er{ z;SfKKFy3Dl9NOwh=sLfirmHYbylRx5aB;f?j~(qUegR=E)}gcESVpjiTpKRXtx^4k z9U{?RVpm5&YxnZkF79X z|7BQEMYM-KMNFh$4j=rHgM=yX`9p#a&+&DCa2iaOfg~qcn2ystjzK>HCPHZj6xJb? z{|!xE0i0Y)2q6qGmKnyl7{E)BgxyRW7$#aHHN@DYN}Jb$9ETKK&?0=TcWT!K;BxlIft3!wv?G<~s<2s5yqp^ynuV@QGq z&Rerxt`XmOBZr-%NbJO+s~hJxCS>)TV=O@3=FuY9qW`P^R5__2*Kx3k`SN`Py!zs} zilvIRvLn4bySCxPUzWi$WP-~UQ zmn~48L4BymRxF5ja^>rz-nIHgpF?Hq0*9UsBe6^06zZOUT#PPV@6W%2 z0gvBL1^eHJisxJ|*ZQ6DA$!MrP_(j5s7IRqb>X76VfK-P4mrOHT1#o&+u!K+{xEaN z1r4T$rt&`SapT9-{66s6U1-{98SP-$uC2VWjK@7K1E|U4dWX5DBn{MfiB&YRwf{F;8X<(3Ekq(21p~;hx95X&A7YL!A}k7tONf!!&kbW_Eyni5tZV zoD_8@0wC-R{7qkc=QBQ>H=0g5GaiM*uTApUtwOxHg9@RLJ475=@vPs1F@oqm+s|Sf zn;BwmMM;cObCE|9UrEf*EjJ76F=CJa#tyg+=oZp~^3bnz-L^} zWFYwQDK|SKQ~f9O5}wSY!;opc482^{UIezj0H0~SUh{b@|x z1d&s zDvG&Cc)pzPw#ZmME5XgdXGo8fGk^}3%g8LE0$HvsW z!|>S-i(ML%MHnnbBhjOa0Db(E4kEg2$2yQALDTY&SbkeDD0e`pbyq0jEkG7LMf8M6 z0@zU2-HXv`H7>pm=sJ~xq!p%xqjzg}{w&oQXOHcG1M+s9TcU`cN)rP3r$xw$z^c2x zAjBb0)N(RJVynU1dsYdQC#zuFx~c)Jm7Got%iPW5Aq0EJ9Nseqms!V|@y4g62uT}x z3}aPpsK*mu<@`I)uhsIeGk}Mym4pH#!=DQ&yo$KJjm9%|e9s`YLr9;Y(h7We=5QXO zXj1@@EKF?*k{xpjtm5#u`hrhJU(gs~VPKhpQQ$l_)y3^CTaiT+p%cy0z6SfxGQPm z1{sJp$82SoX-6t%ABwQQY+YXK5#nx=d(-o_zvQc*X`cA?L&EL0!s2+33H)soaQDn6yqE-R&Hj~@x++;rk9HSAdDji0v@;)X)IK4O1zXRg zZ$~Lqkp&1*kVud$`6u?4^A!_wwYYYRM=`_}#NLS^vdxL{3tmm(a3SGy-50!?WJ>P; zmNou=X#xm4QyOp+D0NybB8VjLLfQ)=2sa>Ha41nq9;i7saQXRYQiI^g;gxt#wnWOo zL8CRmSX?swM-DzN5Ya}>{oliurSz7-pIYjh8`4x>BkmLcj4?IYp$|pMikrPmXS3Q% zwkb=BDVY`?dAH3UUI9Lh251(XjlvE(x@lY2O3^ho0n7E}q^wO+fKO>}MLsYGq{=j~ z7e$Cyv;je}phcob=&4~DV~LVbG20yjVNeCS?y+^eF`{fA6-GcT)Nl|1P`vpMIp*a1OEk^(K*K}$6$tN4aRn3-HrP*!z59&9Aj z)3H@FMZW(e?BnWD7nVT38d|BffsDlrO$8=Ie1B6E&Ndq#c0RvLnGY$CgHW>5{y(STa50sc#8|5E)hcnPY~xHr4^z3<&@HHCCX7ixqtD z$r4elH6uZ7J%|IdLJJPqc;Nj-B`Era4r*$s0X04H{y{~NV0!c*f)AMqsFr5~D`I5+ zclgeJSvcn>5#`!vn6kOx7VD7Ml!^wgv63LainJ9j~X?k8@8Wl*WpabP2+ zbQ5w+`pR>4=1JhUszo(+?)X0a>)0>P&i%sHTEr0L&)y>8=6pZ%maEK zMJ(p*+a&89PHL_@lLlZUSJdd$6%okE>;iTwN|Ya-XiWi9CXZdP^Q=?mh$bkP#Q`V1*eLR%E_R#3UO6#brD*M0AP)dxqg;AHXN)}NO=*ZLQ@qxFaOzee0{x5 z6VU4lxS!rMzz|_xogy=7fQ|E#BMkv9?jMc~5(qH`>FCiRA!SnxMnsA-tCoEzz_@Pj z0v`+#JUgs8QT&o6U$SLRMh>>1B*MDB^=?{DRF^;FZ}l}~;R6@Ua6o=G*hixwr3?ctSQH2&eaYW(X9P(-qZ!1mP{$P^yNmMexrvQQo( zal@#WjK&FMPYfrGY$&Fln2V95lZU__>1O(fQw-%q;kmAFClgL#tj)tHLEaecXyoI< zlCo6y(A`lAhbY_G`lty0+8b+W1>|A1u5LKh&xs7edz$)1 z#M%_K6<%!Di<%F#awxMIsWOCYC7#QA>PXpbWX{ z>D`7i!@9C3*2LVSp}*Z*%!vqo{8A#vn-_#`N1E? zsLR2>PVqUZxdHqpXsrlhS_@a^U2>(S&Ke3O`!6I9NbGLy!jLV?hTW^Twr03c_OO3} zkuxpBbAOMSe2kC|0~n(ZC*#sE`O)rz#m=L5SApPpk>Reltw^ZAoo^;Au!v;YPTNCR z^DEL1SbAm&7Hp~y1Q=U2Co9^CSB9+vOML3LZ(V_1unGoyX4WKX)`ZzXNJo=pn2o!o)hM1&o91;)HSqNt0>Un%LknaSiX6kjB;;kR zb`SSvbdPRaXr1b6f7##?ep#M$4pxnYJ7^SK-5ZID0lm=cxYGX_G|qDM6ZMN0#@*9})veV+5 zI=3@$d$&(!uJ8kPO*^Nn^7QpVXvsj|M>7NCA2a)*OLg;Np%#)aLriHGO_)o`n+>Va zhi1^o%s=&5yuj``IQ$O8SPn4V^&0)-l$P2_DK>3+0nAMXH3NY4W@pKPPQzRRNQuc{ zj72)pH4rR+pK{A{8iY1yt|RNU;8fHEDlvQ7KezTrM+sAOw~;Gv-A4q;oCkb8%>9Lc zff1MOT}zovj;H6KC%m(*@uAezwNYCAmLc)Z z_hbLyDUL)38u@f&Y&i@+RLUkszeIdg*`f+@bD2Tucx%O$VO6?RNo$@XDzftC`9@h3 zewO2Bjr@5D{l}qJht+8J>fP#|^5F%Ix2pT~zF)Q~fWyPDOoOeH5JU=C`#8BC?JT^) za-GOgYCvikrPbj#RKY{A!(9wr)-GjN&(Nz0iEWn0et>6emMskAH_Uj2kQAkaL%_#c z(Uv%lZ3x$&OxNokYKSM1559P_)O)$vi>(D{A92Z<6hUr*SbT`Y9`jWshq{}!8gOUa zU$h(|pLaqFQ$R#Asu0K3ZknJzc99R8qPRC53;AB|*WSONs(&Y?lWoZ~Q<-HBNoVCc zyYI3MpKwXp;=pD+y%I6J65Z*+lM;aWRoZH8Gae1)+s?PMLscwlS~$@ZujWmNsWt&> zcu*el5O!D}$gC%__=GzAg${Y%`^Ur))6_*>7NPS$oJ3%~cRVx*5V}rLABK~j6}?gJ zkGWGk9}(I2^|tY%tPgXyy#_Ibdf7La;ZDSy$QyNj(G-5vyMRg$RbVi%vS^CtKDVLT z)~Xo?9T0u=UvopmFa+#WBlR2mFaL~2OKM+1Ef{~&UJFu%Z!ywJcFruWRoc+7WC82n zM9XzlQqWG{V&>}|_GD*kXt?*ePK57vjTxK#bqI!L$feg7=jpO{A3Z$(APP@Yr#!g- z$X|xH8>vJ1gVJ>LpeKTewbTmtZ0|ANeoV4*QBo^BbaDk{aNo%AOTBmkr8*K&L17x?^!#cBirYlFeQQjEaSi<{SJVJ~rs zq-EUy@$2h)^!f4JDpP%=J(=&lXp(4UoL&>21pKcoJerR`yh=a`6*6z7a`!0k*^0n$ z{~-`F3qR3q=6f4etmxMG-|tf#lzpBiMkU2YX-mW=h%qDEGzyLixk&U|t8O1FGG!I;m9kgl`CPCtNrSDrw4 z18;}GF11!)dlOP6(A}Rw!U<_w-pOnSf=#Oi<9>pe5&YWp=HA zjl6wA@KMu2))*&@J)Vz8IDT^_B@2v|Wie`INsEDdkq^|mlqm4k29l}>bxBp|&R(2M zNeyp%-)Mc2Vl3iljrLnxu=*noH(k2XrQp(UR&+ZVGX^zSHKD&{N6Jw56)7ChPa7un zBuXz0(5K79TOi9agY_ZlX(wlQ#-6l6!@vmtDXPr_Oq(kZuSw)^^O6?C5`Q>h(RXZW zGwE(ByL@swyfSzh2V4M(Iy6f+#8w97e(lQa!B(`?!>oLqqot-OV*14fMK>X#*EjlnMFuD5QCDO=f!V>3yQ4KeBA33A!+}4z4$N)h z*c?m2u=8`scpzcMak_wVt;>b{4+4+B(FznYx%rxNxPw0>r&C;gzTWBB^(de*+y z)9MbIRfuV?OY(^OnLJH}m8-9rHz!4}Gk!b7W6XO&GX$)T0XwSx;06F2&%Z;kl|0Qy zdBS@y-T&C+^VHMT^v!!Y= z=amxkVet@5xJ(>as0qMI(jt%&0`RFv^SDv$zjb6JAZM25;q%ZI^lL3vxXZfO&)DIl zQDqh}wiGB|H@ll%|Scm6?!`|K7BvHCoV| z?a-&A!~#FX?ItZ`Dw$2iN0wVhj&YccJ#!=}H?9gRJ^_4vw0}-xFSfUSI0_ch(n$!_ zT71e!=0O;BSouf}&vD4y+v5H8pJ-KOyp;25u#=_WTd(T;y1e(9JhA zSg(8+6}kUA-U{`=8feAde$Niu8XD*NN}ZG#;m&vt68b829P1OQQauMFu+9v1@iuT8 zfRo}uaRr2ZIME=${cYAZs6pyml_(?Cd!V!<36|r(o|0vd=MhXp%e3Gpfl&YcqiHZQ zFyO9)`2lmo{Ax50e)2Z%#0+37( zX}51t{t$VJ|6CIKjNKczufvgC037hgTH6TO>et^>Clxnd+?>vpcKp@>KBs)Aoo@Jp zAb-GZ;cS~G6~~#AYwFVze`Vfv%aw3hR9dX7TAB_4OULzmBh<0%df#y2z@HUy4g5Oq zm3q8P+?DUFAg)G(*Sifx3zLWR8{xb{#9ZkN#d#$}-G1c?&jM9OWwUpJLTs!{ju87v zgC#e(%~H{wE8g#(nX=)lnaIL_9^)qI+qI z()=A}oCGbF2kos0S9nC4vuH6n4Mkk}wa8_^{Vz?FvF8y{|Ne9g&m$W8YliX;Z0!%< zeJmi-Zuz-Q50o=xCfjOof(Yh2iOe`Ax4_`5F94yN4E6s^Y~Vi&JM4`ApD#4+ffS?< zIH|RX41@+4@OzQTPZ<|~+@YHx%~mOe_oJx#j+~!^*OsRoLxZp@;7+-jKX8A$a<6rY zQcje*zUy&(H9DlJTXJ4W)NxH!w$SpebBee3*M{Pl!NNo9RAvj6JvPbju4Fk{U9wSS zss|r2ngUmp+Dq%OBKb&8&JK3k{^qjBAMcifiw#35;D-x9d`K`9Re___WT3#rM526g zN&ce}VBhbgs5`B~vkh2!2sR%S6&f%V;_SKKs>jimNZLG8flk~`NvebR(!O|=B96sgBmV2V$h?z9oM00PB0BIW zxTm5907}9j*>zVbmL*GUqGU6Z-%}xWkun>#-ulxRj>4UMS3qVn9;u?vyf)?0G3kyj zrm1Ot^T+I_U@g{&Zu70A^mVe%j6m%~sIRL9A8AxX+aiy#&-H*~L1KvuXV>x`{aqGm zBbtOUNH7~?!SmRMPex92S!Mwdw5Moe+t%3y;5kdVt7C^Xh?`L8-=vUb{JK9e3}Rw~Q@dEjwX_t`k>_V1sZ zLWfSYFh#C$PXw=9yS%Yj*WznC!)_Gv zv8o7q2qP^Ny9G{LHr=CYJBP;QHXcB6fId)!Ol}Z{NcA@>H$m1ZpT7@~}(Gv`A6_jVY+`8fkD z36I?G)OfsQyTBrDQf4U%qQ&;Zho^$#N+>UYCz@Lyep=77Xu|`N`=0`oFpBqpH_;tb8&U{U>9j0f?qUkE3)UJ8m_+5RTrPn87{Iz{) zMN&tx7Eg(WK7Gmen|$_4rMeT{?DFI%pTxR)P)yu9dBvLXs)GP^_+yFYDnviEPTwes z^@Rr_W}x7{+fW=}Z7P?a4;6X%S^*2{QCv{D{53J^F)Ep|4E*X!wR)@qkc2;rj&xxd zMaT%Q%NFI{L*w8E8_NQJF*xmihn*5aIkLAgeqnXK2yRdbR>WEO_<|?#xtiQS7Xnf+ zL3c(0UQK16LwY9O<{3(wbkZyEZeTUx`(Crcx5B&Po{P7b#k%@De#?3=5V$)62dsd< zW8xWQnas0sjTz;qdW8i5vYKj17&VSLB&MqWdW%19lfM$A!9iKL=J0uVrJwEjJA?iW zWv$_V0D7fP9pZ-QX9MW;%)~cHSoXd_Y)o2z$TfYh^KTu0{|yF!4^%YFe~Y}LKI`|q zRg&Y`&&2Jd=cb2cJEGqF=_D-9*?o8=By}RKx84QWLW7lovbt^n%-s731)EHIb;ait1V-TE*_RKOOx!gpFw zTkR1_dCbMe=&~Rf;LRPXqC(f^%NQhNV`wv#A+j!!u&obJw~~=VI~p|3OV;+~E$+4z z+9$v$?-I65gfxc*U`RJ6I{mq5+4ykQ{dtD^mwW+>d#kCR&w|~akn{(PCe3Iykn`1% zNygNJ?}dX^yX~x;%D2;lx{bN#&Q>Uu+YNN0mVWecDh|@#N4G>`p!gp7FbTW~HG!#! zB(ej{?4I&PrUo8R$*IsY8JXECRe4wQgRfimIp2idvwZFZB;Hb^LMmOA=beAff*o=P z4E18ZVrnmJ2zp$oQFcC1d#ZoP6E0n~-#*z=C4@D@D7)^8HmPg+E~V}hQ}(_TMIL5>q$& ze+{X9o*!;xji!|S7CYJxH}Xu?5(bgZjwFKwvaB8yQYvx)L6H?2WVi?$Ha$UCD(QOl zg^%AFck&R=iDdZrdQyUXoLy?3o!~3EhHORhQPsl-xBZCjUasV!Ug)pg za}zZm$+oHA@7k-&2o?_e^uMiacRlzFpFLe}y$qV(o_y6g-Wv6_PloqGt*%S|Y;6Vn z3*5v46gS-ag(U~BTW<(1gF}vZ*UO3g&L^&MZFSeu!Q73XTrX)sMZ_KqJ~jlMz0IDH z?Y7Jakm6JVBgi^{jZ98^#CFNv2dY)bxvK50Ht_K}EO}G)8*iBo6Q%4m^YijTAoP(3 z$}Nqi?1nO}EkTKt`dtdqsXc2wo`Kxasc>}yz@moIuo|h9o^#Av#{JtQ@}1gr5fhbE z`igAM5BFJ6AHU27E|(v?F*9L7CzH4%88<=MpW3oolrDW>3{_>*y63p#c3>A(Dpr-7 zI37F?cg5I%bwQdO=wDgoCC^Eo?&@{bSJk?DOV3dE)z#tQ{c!$%u^XjTCdK5I8HZ~K zP&vKsp?_GW_9=ySFxc-1t`z+6#T(RCKNV`Rx6Ls!GK?ybo+8!PcPdJEhD3IpN}srk zX!K;}%yM>mh9e!+vc6i}CsR4`un$lntDMszZ>XHOcF#T3f-J>C%=Yz#{2A(9gYaI!e`IqcJh!!y( z>nP=4G{YV!zE`M`FhuH#;Y|@a;>*L6B+Hr@f{=+UcEV!@^mvMO_P)*o1iInmp$Mf_IP9$o(=Q(2DWJ_n& zzQ(FRt87p@#Jf?TdBZ-mUs1K(+eSrN^D`QiR{r+F3?NO(t?tAe=Wys>C*?8dj%riP z^k;?_6k^kizh28R_Y0DuvXZ{#9oQl`iEt*f9TKF$huS0U+$nDbDCS|<0l*hbg8uUw z02fTDQeV#<@l{Fys$b5ZLv?>T&bfoB4J2TDuU1)XlLnxrCmAfzVvr%@vglRUT!Ac> zJjQsn9Hl!nbs>BURyd<2kte%s90V!~pcNs-RR0BuBLx!p2wGFcz4)u-(m7^jZRRwB z=7>NjBmz*m2q&yJ+WCZhK-)a>9FA;RA$Q!0!kBm*ss}Y=)sW-*5NJCA%94Mwat$B= zm=Kp*8D-2iE%N8KqJqB$DoiG?z&C1gfclU4L&600rE{Nl^*(nfLSq zwFCNe)2g(?gh8c zmaz74%2_GVZasPg)cW}iZ>k7F(vP(I6IFlzKHNp0yZsQrC~L||*{*|s zh5ELiK-Zo;Wx~WENmWSMTx?fcapMXF_y5XqStC}3%=32@BwV+smsW#FUa`mk@cfEx z8-_+SAhiGj+gPq7r6qhv ziv((+e05qeiB9TMSVgOx&zX=vY!Sb2Xb~Q-M(8#=(U!lmfkim3EGYPvvd-S1{vh{0Y-7L`dzi{xz^BEf8uq zN;>}wx7ciw=4cVBK%aKW=6$*u*lSY4n$^E0yH-I8s(!XZLabwF&CY0Ze`@_XY|D5B zY0N#gl=zBpVN9k?Nk%F4H#eK6aT#kPQJ8BZ->O53`tY(LA$|kqxa60vY^9<=?fc{$ z?s#rZSOZ%9wlfGR(ezoar@M+9G~Q!4EKeZoCYC<0qJuGMVtN(V)Yjlw!GEhi%Gn)I z@v8i@jB5HjxPZE28q{9!du6HsYFDHYxe}_L3leWpnh_q#1x(EYNU6@`+{i5NI=u|6 zc!9zcI`|)Qt`QtS5Uge2TNeCZlvOE@m}7y7zdu@-kXg`X-I;h>jLm>3;VPZEb_iq# zKC_zC0NB;P7ja$>M>k|*GAuz$q(sIfkE`w;ZUJv5bOGrd8+JcCy+e_|YHHQkb_*d8fl6$5zre$jR zf}Mj-3wn&MYFJB}Frp|JqQk+VR+3khlL+&~z+xbqsjy(|H}z-Co{CVYzq>G4RZ7h# zMN`U0Z(}pS;JMG{m%_o_z@jNSyufU8AY?)F6WY<-XZm`X(+iSnh_{2`vpEnweC+Lg zS@vJdJEw|wjdhG`bCh;sYcG<~`~F?wPn*9&{%FAxK})Bk6eNTOX6yzE_X?bc%;#tG zcq+AY#JvfSvfp9inZ`J{6sC1UQKN_ESfEK`c!-{Weh~^HTQ6FNO02(mN0y;s1p|Zj zebWJjZF5XWc(mZu(!Bw4_e(1H1-k8>W4 zeiKIl>H4=_ZEc7pI3tm~Ps&NppiqzgI@1&A)zRM%vLjjtd}VXOKM?=xxrQ*?wAmbq z>t0I+eCbmB$xVz?-pSVgewkOsGvv;-`UK#Z4Zn>0em+M6I=42fE3G>bUr_1MzHdeOdj*3>N#O>7W`lOP2P+1k#VLAIlZ)(uq@x}6hYt8 zCHe5Jc>|z(@!9IZ;_~TzQI^(9bda=_qjk3cYOIfrpeb}}%_5XL;4ky9Z%_X&{ytv$ z81&J!p8GAq37wA!tG>N8)ix@KsrwYp4(u%Mu^i(L6+h0e_)uHc&hFstndoeOj@eea zEjOWKl*lVc);e`Ih&_ZXs(1i|PX4YgwtDY5QvIn|DBMZi{)Y^`8-S8*C!0ScWzIL6Fi$U7#@d&qoM1YUHALl$rt-o0WrL6 zfzv-ua%ix0-gF{^u}4O>!T|zVE4OtIplo&Mq=%I1g|Ax@_^p=WcFsT>WbHCw%^Mn$l6A*M zYIrGk6Y+bK)#13=;`Ty+OS;(`0NC_edKySYOCGYaZK%HPbYxu3!5^(PLzg(hvIFG@ za=U&`*Mn!X4>|x`SS*ZE-Z03 zn2fduEaoclcM}Z}%lhf3D|O}>Kz79@l$UiRVlTAnlc6fkVw4!rssc~}z?Qxe-0hLB zp#%f77eA3)_MPr{ZWM#0clwBdf>+ElZiy>`-@-nrA_MSD5jBYMvTT-5ti-@NpQG%H ze~plKuci|;54>m#2vf@TT?< zv4B9O(eQ#a0Y9eg@`B(12sknrlv;G|c`&)#5!oN?mTeQU$E$DCRN?UvIjXa6sz*Q1 zw^BEMpLr$aWL?}jQsi~m2(rSXXx4;)DxF~9WLo7c&Wv`F1x0%L1_%x70_G0tyS0De zQvdqhXxzwkTUR_i(^<&3BBdmxBlD@KsUn;!wQGRasf~NunAO<=sOE{J!n2uQHQzi0 zb}_8!8msIKsHtyuR3QgNXggyEXbF6*&fePZmbNiQYAx4orT$K=Scse!+pxK~2)2O0 zFy&Nl??hQoeGia%WjB-IZ5bu|O=7{dYO>bwxTS{*&iP232-ONA zGU^OVgKCC~y?!+TD5P&?_2`Pyl}{eYJcl+iXj^Zooq(Vh{gu>B{cW94Jw?it&X8I9 z?%|(wd-z^l8sPiuO)ly06wGo>n}Thj){f5cC3sa+>FNp%x$tCHx6Qw|R=#jk;mC&h zLP2@UC@rns0O@?j7C68rPcK*9Z)*5PL!IByB&W5l2s|I_09~VWl^{QM+@S0Glwp}m|;Y!$pYpWoyP?Y>)^;4NiM(d!`E_Eu}pv^738iB zm^D+JycqW#;m2a_hU)dr@5n@!Dfk|IDPo1?C(5l0K!v1szM)TettDN6al>W_6xHx6 zQemab5#v_0h^jFzbp{$=US49Np>aIeJI}b3k|Z8kjPH{5l2mwf{!J1rbY}X4J}^EF z0s@$4>CtG$!h!ekX&6yGLzKwkJk3gAAXVK@(DoC@%oL8r7=y$6h3TQwA*TIJAOh)t=Wd^546ktPO*D{Z0|i~?^~xe1 zBs78n9Yia>LJTnh3~DV^24{c}9M!=ajpscvIVM060=KiA2oKE^MXH#5S4L3^n&RJ< zh5{Qnad3u*?;z&PEicgZ*8-rsx@+qr;_tu=K&*IfKcjBxQ@QzN4TTtPK+sSj<;k#} z5mJ5|tn7*wtdJRs4Bn%r`l9!UzWd{Wd4xHPbcxEgwfe8dOq0&ful0ReiU_G*qY($zK@L5r+iYyp(8Q5-88 zsc0A57yBCSvU53@Vl3lZVX)zvEUYGKe@*gUZ)U*h1adgv0nR0LputrB5_Vq`5qjE* z6C>ysK$jDPy_38P;!0`l5|l~f0Sh%6z{;V?4b@SEsl2zM&qrAzNxZV{E!acBl;)R+aEpiVGA`3@skTvZayg zA9uBIY@g>5VyfL1iP zo2MNJ86a*k?HS5^Ztr5Q28q<mEq%LeN1EqzD<*iNY6#YohxQbXKEpbK#6Jh5xMC4ii~V^fL#Dp4rGQcAJ^J>cxSTs1KVWI( z2vBaJWJ-_joAAiAXZLH5AeA^K!ifM2KYv?g%65e=6LgUX!I@Pr4Uzz=hgn1+I+F8GJK z-cU|rp3+}b7uB;NDL4UOV|xN0g=m=b3$JTj$nLtd>cu`YrMcu<@lN^rZq{qOBLnd? zf{2C}+ny5F=^*wnL$wJdBGsre^@dd^AqOq|_8Shu_%VcYa0F_mSz(pR6P;i37{kM{>S&7ltDb~!0qaU{u{Nj!wg7%P-1{YmGZz_U*wzk;@GUY zyB*vnKAyxZbkA)OL=A%Tn41=Ay73DHnU(#sjZtw(>UPxssJCh6i0 z|2}FLrtQ`E_jp2QP^p2n&rp((N&Yf2p}nTVYkaG}eWO^Q>a(>pA%P#5V90-%aVNNm z4sN_JBPtarEsG?I9gcMjZW3$F#+jZH9mTtbIW{634I zp(z;R7y9caR#g>g8vs&|P2xdMwLaCU2nYJWqk}tlsIh#Cw8D*s5TS#UDoq`@%z#0V z76Z?io~NcD+Ev-rS-At~gN6?VyRMI)N{%t&PD@f_u*>_0%OvQ^_Wwq-GTCo^1ro>Dsd1=0EJ<+sPw%dH)48ma0AtxGz$HoAvY|4Ty5n;*7-+c>tWrZ zfINfNS~n5tKn8@<^HJ2j=>MVWoEj?u)@>Wxwr$(CZQDu5$%<{;9osfKwr$($oId;P zhkJiPy;Xfxv*s91{FRv2flWx34%WF8=FF~-)mru!n1A~-vge0rWDocNPO{Q`7R7vu zmT`uLLJFdTMf5|D#F=cyQP&YEHT-=v3@cUlV5P@AxyXQSLb>)6F>7QwEip3Ao0GDJ z{bII?Nthd%3Q6bGPU*y**gzqjj#>OHhkJ(T?;DV%drZr7o{C2F1hPYEtnvBhtz`Fq zm=nz3 z)?f1-du{+3A)N%oX^9HVUaY5(6|$(J(V6lM&wt%%zuwBdh4<;_O^>I`1OUafdReSH zSen5-9J@lz%jkG?vIK69$J3~ynuCx4hO!9!W~yS5uLc(fl^z-jy_zfSql;OaxVCY- z9Gn(-_sQUB2wBM4?K$uYTG&k|N;;0_MtmF!Nen>Gcd)VV+7CGHZj!?T?im_9x#d}H zy_!?6CUeefPboVLG_BAt5R%`cpDQV2rNoEeaLOm9sXkWlgg-3tC@2-*U8g*<;G%vJ zL0s(*6f19@6L+2NDLZ$F^o{K%S~A|Vl1s{wbLuqVmSj);J6Kb-lPM16S!(EKmSa@O z%me_$Zu(boD?j|m^lmdaD5cj|JiB_7`m}uwr0WC~$+E_`m&FkHbDNnV!sQL)bFtG} zZ%(q(W0=1x7=fh`7_)KvbcBiSp}x31-2NpW7H=)5dM8C(S^~durV?2ZO9qsa78Sm{ zG|AwWGyd~jDDnn|4-_z;n+r4l`z=^;0t5h#Bn%Y+jGWAY$}53RQ2_9J0FGl|T}+Ik zy-hMbkSMC~#_MkX$T8gC&z(+EaM)C}yXgpT4i(Nu8Wok4oswrrnM^f68VhE;zKiN%;MLB#1zZGP_-A%U5 z|Hq`Zk{ZqD+$)?1M}Q|mv97gWb5Y6yo<)KII<{YQ*PkkErmnqO^7%vp3u881h7mf2Fmgt{W`J2yLiYE$vy8xePQ2SZP=mG{n@Xcojo?ys(2GGmj+Q=oTO|V49W&qGQ z2&58{!PBAm{MMcLq40n-k@-8QH>cDJSVH&T!T7mOgNkrvGdI9O%8LIEK}d3Gmkp6| z;x(-_R=a!1H4)nG+Ruv}Jqv3AS6kPdUZ=;f@whZyD-gUc`S{~|YQPL9!Q+j|FvyDX zVeR%-;mwtfkD9Lm|L(L{+pb`>sXX?lg%;=3|{UwZEt5CSb$n@yCk6=)Y zC^~YkjQ`{6*m+m5qEe;bEp|aEk)%)aPLAK(W5#Ke=ic1iLP}H9#aO za>|nc;fr5pZr8P{FksyWC%~wt$qe+TeHt}5D5QD+n1B+2D=4_!;&aON8D8Yz;Nj}< z@_cVmSIX@VK?~s*rFdo4YPhCj-i2lfc~^5BpcE`M93C+MGE`%tH#y<$GuoqW)l###bK;X(Ap$lIcZIhX06#ho&U+o;H1Nz@}}uvWj>It zYTxu4fvn;5a2H#z-mBtc*6yCpZ=Dtlm^Dl)Y}R=RX``FLW}X)u>qjuh!n7z|i2k3f zkNrPiqCl8A{u92YgQ$Thg2>dTtEz#}1Hg9JbEJy*!4;ZO#_jy>Cmq=`hlXxZWI~$8 zcm2ek*}6LRG&u zh+P>-?%Ln`kCW%)e>>sTj)+9A7p6&s1oUe}NGBz? zN`x+}!O~bJsTSCNP8Z|zh5G(#8H*{+2RPA%NZFd}*hR6!toG`R$pxCB?45eCgt8=A z)e@XNojh#6f>s2%m}KHv02Ba?dlJV^#2{jkpB&g`GS7=j9zfurMa@sCRv8|hz94ok8+^i z>i5p2g7eleZQ|5w=K|X9Q9(^DR>I0QTI#IzjYMw}+5B}FphivP1u)&R%YvNqhx{X# z6`K7#DtY94Zd>lf57H?R?yg?k*AWjT3O;I91`^$YAtuF|_SdgX&)km*7nsllaX`+; z*qvYqV+vTG8Y-3V4S8simlm8*LR*A*>^PC?QaD9eE=CsTaDuna?<^Qm!S0JdiqN4m zAiN;7u^#@+0a}Rd3!n+zpt)8*VCRc~u)6N4D7d5F({T4><*boXf zN_%pmt^ujx*%3NK+C74`_thLAvYev7BeSzd|7)1a55`gW;>ybtmR7{avua%wYkyk6 z@jC*Gt+P;MA;1w|Z8H$8I<#37adCy3L~&uX0Z=?Xg=0ze2xvfqJ?_JjSSIOO%k-oP zz22vGVCPeu&&1io0&7RfC0dizkNC;0?9xM2TPFHE9e-XBhCqv5c;9T}d~&KyN3r2Q znQ5Z8oAtA)<#a|2hX-q?s&Z->6|!Aks;w1%>)4DgRkiXykh)%I6U4g23zC7R~i{bQ12F2yR_)2 z>km&@Qd^O-spLyqLr)xL5JTxo^CAogB$0ViHmfd|0{qd;AA$l9F-=Z5L2kwv_}D|{ zgOi-Bk0o*voMAawESFM2MnuR;ymxp~(IK~Ni4imo0r=1qgrZU{4~%VZP`%2@V3CW7 zsMFT%l4nIN;jngnuRJA+=57;_cx+J)9_8Ct$eI&FJn^CMiQ_2uwUxFL>${31laaLd ziAN;hE6QX7mN+Me)aUw=Lx8fM$eUHRG~xA}NrXHL0s@z#g=7yRBGe;o5z=Gk!WXe3 z9t2zh0r41vLZTO%!Bb&@)svSiZc|tpik`YEkuw8)1g=kbGp=j%)TT2BK*OwsD=qkM z3F?d=^&vHuOq%h7ITZ`m?OTe`=RA*;W}aPC9G5GPUY`Ka>#x?c2+DTwtRZ;PLB7`g zf)E(wz6RS?`{W_YL3f-CPR|l5a0F!DJsVRvKyCG^)sV8U5-t-$rD!UecBAKTTLRr2 z9P5y}E~zRW2NVlzL*We2UZHJ`bsX1p3!()AAv}4RkhJNv3A1598da^x#@fh04I(i@ zcK_4+BUAsYktq&CY{B3rl3MKPb-@G7!C{L4q2>%S_J_-7@-D5XtD$cW zpjOC|EIe2CdSNVl%o8nAv(C-_HLFQ75m=nzaCOO5T+^fWkOA6vYnuHwrh>!)VBd7` zm|pn(@_o5li2f?LFa9G~btPIj7Qd+}`_+cd1V5cdh`*-mFbuHZk2u(&hx=+2zdx-@ zvA>82T(TMfBXDn!;Gai`srO=~?=V{c+!_~ypli3#e<37{iP`{1L2nA8w*qj~kh=s~2{()q3B$R>i6#ym%@>UXsw$}=+Vq6x zRxOo9tOOUo6>tF6?h8NN7mc7fyK^CG!+PoC5$i2*6XGjEHhc?qr~a||eULB@V2gJ1 z^KkWhE@He8k|Ff3k-Ir)5aBKPuAp;Vf-jSNG|8SF>=Ta>?js(lk9}P)qnVuCHZ>y? zsJ&s565U*tfruhrFg7eBF^StBFGv{Xm>~8N<5ugw+O>6pVbwZK-LpPUAc0r`wa?U% zAY=?LsOO!0${A{oX)7~C(_S$Iz!%0`V<1ZKcWfBr(aJvl83pkih_7?UN+F7Bc+}v9 zm&{)N*VGU8o1Y&Hv=G9c(Y)rv?_aFKFKrRDx~y{FV*m?lI4K;3)>ou6aqEh(#(+0u ztBTOj>B~NOR4+^8R&()WR8)9IELxoBF(n=*K9|+{&~b0J>w(RAV+8sHiDv!38G-x1 zSI_^>2xEF63c%3m82TW3fH)9>1~wUP9vzq3jF?3uYBp75!8n(S5STtOnvd%}TmtSM z&)}W@p`AZ3 z{v!7)?Kndk7f?Pf7RFvOktNnvL8WGXO#TE3v-5iqGF^rzoqpP`QCk5;j|tRPaQOY> z^>RXB3RoSQl?H!tJ4bjR1Vz2 zb{*BSO;;l!Hwmn9zHO;aNb=N1 z0Vbxx1lTV?IxKZE8&mvar>ABDvzm_zy4|9`vd*hGGS-DDw0|Gn2*AwLarc#|Qqw^3m}f(9gH2P<|x7 zEyi*~0kr?yFm%%r-B>eF5|_XIA~@aC!Lwz85#WshKAa%?jTdKLZJIXfokiMY9}F5$ zhM(BtIM3T_)<~v+^dC=w`;>(-qtXl1<&xg z2YW(1mODfRK=GOqL_E(px%QjwU~l~x6XqEn9O&2;N9Go;2A!-E4!=UE$eWiv_FDnI z9I!N7@R91-U5>|S(>C^IdW-3)$CklJa|1gAB z_U3frUNVB?zqn8b|8s(~+>iDav0FY20&pX{JN+#uuQ#XAlY4|{=bIGA@s~jJIqayr zAz<6lNh0{{G2UjY?B+w7p>2JetqRZ~H`;EXgx_SoFW7g+`Vt5R_ue@iKuDe4+S|0c zhOs;Xd^G|_t4YdIadG1_hp9S;TDIFn18R_qF;zL5FzW4nv;fy)K!kV0+3G1i2Jq@4 z?4)0EGc?*fQG`g4T!pyqc%9cwxfJSOJ>CE)4E6r{QR;77cW9$eAFKN(aK*$;9JH^v zoIBs1c^+g6^a&<450nDm7>fiY$-%36MV~1dTZ2xmXq}5Vt;oFi^-9C-lx7M>V4%S! z%a0A^wW71SxOdL|!w5dsdwa(@q8HC22JYj1MlPu-aHsa+?61ERE$(y}__=6CyuRX` zS{x<`$h4!|Vy;@34P&00QMkdXQoP;GuhD2q2RUPa7j(liU4SP0TLubk!|usYw#=l z(~$@k1Ju!PABX{!#MjLJl@P52246CWN%Z}hr@a~ZTYtUF79Gh*Tkj@;)%C=QtHfq8 zLByWCAdxnl4tf`yP`nN708K|G5St`2h$(c-B>@CGMw#TvRf#rwL#O^~7DLA2Elo-L zPG6K3{ek{S>>q=X(S_xS!fHBGbchmFSToH3AOD=Gbfd`p57rV&={Q6W7=B0lft?i= z-DBQGWg*9BUm9_`9=ADMIrI-5QeqLLtFkogs$|T$ZmkuS1Tr56D;rUDSmBQ8c(}+Y z8oxsndwL+HI17%aI`I||273#51Gob8U_f$myOVHn2LKLy2EKQQt2B%o4Bldj7#$w0 zumJ)iU!9n4#K*;G+I*g945B!ziV7}UUz~pW8h)!&b8dOc&^bY^r%>2sgjN_Tvb&5V z4XA5?3wc3&5lt+1dz$+-{Lz}7`yM<@)8Rc=n{OvSM| zEK^qr7!U@JRRNM_5dy)6D)sLHkqT#&Ay|0?I)KS4vK{_8riNEP3B>*xU4-G z>`i9o4)#OFz10(uTbBlEb|y_n)LF5@xE-hu0?-xBrTdktt)G)amv;O2+Y&vkZuh4} zhtAMpMYHO8nLI7qn7&t=Znv6&a}|?NGkqK8s?%TB@8j$5$cZhN?Ae{T22L&`(+K!z zn2syu>y0g)v!0`7Ja4ym80n_A`S&&y*RK~ z7w|7&qRYt%Ii~iJdz`LUH9EI_c>z8%x!&a~4?nztCn?oR{~Pe0R>#OexLaF0i0$O_ z3n%SX!?13UQ$0AKHRUs)H8a2)Wy_iO7Tit9Q?MX-PzEJetT;rQo+mvX?z=I%x9pSo ztj!Jg_vz&`glg_D-N?4J;^?Os0D z*ToLpM`Qz{Hi=W_WssCaXmt_-?&F9j#D-IR7ssK8vgs~)pgTc51Hhdi+)wvEhe*br zWcZ1{mEBj;CWe0sG3cOFA2TsNzUExhQ&8O544ilGerGxeB%Hu1j-RO=IG2fT3V`ZO zqd$t@!R%`DdrdGgx-_v z<5$>e;$UXcT!XTh4;pcvSfYX-5Ey{GPkv z8?XZ+G;OMt*U^;TfISX%%mEVmexf)03_tHTTDKt<1e`${0u;Z+H!*g`AzFKQjaLXoR5- zZXCdKhG~>w`d@P~J+$&)Cueydf8k}9PL5F;AxW|tz_-Z5DSeR=9N$`)L@L=wNJvgc zJ*G0z0K6eMg*6MlRElRIM*+k`MhD(~N;=e}ggBcty02}<`M4hY4fyLiO3X?)!EB9< z;na5j9+{&Ij00n3Z_c&8^}A>2n9l)6RTQKa1@<4h7_G-aS#6X2JlNt1X1L2UPG>@Q zP>~+#H8l`1*Edn)`K8m8WA@|qLY(scN`UC4*~S?**JsAwm}zw}>oc#lBX5N*kn-0V zB{rN%sq>&+42$tCwl3hu`2ZlV+>-x5I}70^Gf@YsofExl(ds-{o@lc0EqQ;-DbwX4 zf7B<7$joLc*MW-4rT;ungx9H8`@N`4uS)trSeSKG`2Po8|FNoZ|HqgL%FM=^-rx*M zlb&u0LJJUEPcxfH!d;%G5QtO=8`}3}CFCY+cwHab@j^V^9jJf(z&|8VjzAgc*^iZk zlEBGQaT4oz2s6E1!2AwCqWO9>_w?X>JG?3)WFr3(yxo)ivcjfF9^tz&gx~G+mqa@g zl7Pyp95N6hFVfKxAKHZK#5$NQ4j*nkurl)*Vl=;sFOJW+^I|{`Ef!G!Tl3x0-mq&{ zL&jRB=vxm3_rzEo1+E32yQ0x^0f|Q|m@^8yY0DFtY-P8Z>}BI-L%tv3YF*O|s&r1d z|IB7f^Rq#d%b*f`qr6e`x>^6~7%${_g&ePxUSCyt16i|LtDauf1sI!ug$cS@4^g8=~Y% zXS|L(ubavgDb0-D`EnsC3|?Q>*Xdkp$kg_7m>oYta%2U?F?SS&EMm=;Ho~a~S^99I z0t&N++Z0zTlLZHu9q#va025jJ`rv&FJf7jF+9%RUv^g+Jo=DNRx-H*L4V>j3ez6k* zr1_v}fxDtUC>)At~z><{)^Mh-AJ z?A1}SRVJna<;UCnQ((LhVFpo67Y$BFVf`r;TC9>ClT4Znv%)kHm3uAmLu6t7ihg5jB^EaOvpn-+WLt&1~_dSB5@*u@FF+m ziAega1qeJK2Z{h9pXm1xSDtSkr>Zc+Ymc0gd=v;O-*^dCxFU5zKA!9KM{BIDcCoE` zvF-GsKCQ7MDevBT08Aj8sri8nzLTJ|1FSPHpy8B+Gy1nJm8wa}kN(rjJWr_gJ&d@9 zrFy}mMxd&!efObnr6G@)slfb+Z&IxP{`T9kB-s!kFPV0R{E9?fQx+1`<3I%2yCn6n z?21^{Q=kQ$ra)#jq)DQ3Wa0B59nyJtc|dv$>D;Dk>1XQt7j;BeNn5M($Cgxj$tk$L zGXsazzQ{4bj=iJvi?RbnT0V97WOwFv$mJEq7XJ2ZO+;PE0gko`_b=MvKe5^6FrfG@ z3hh4tvVMJk+a&U_e;R38pUFeu`O>gmRbNpA5_>Jl^(i*-W~XON$;I+%YCYoYM}~D2 zb6IAZ7axP_&wiF>7>V0Ibq_k!dRgtqwCQ}gi!-8K! zw3AldIO5aYNt5QoNpy4-Xa@1!0nsH{L}t$bVb61wAtZ|~Offvn9cXQGVBw_WI+!2l zE(i?hI-=zpg?&s-xpW1=ohML|WsA8e9#UCMRn5z2$uO)sq^m(THH({ru+C%@R|aJ$8T|<_`lX!T_t-nPfMXy40&Z?dqNnH4S=wIrqID@t9a0^%+t!e$oczSB9 zes|Btx%F!MzJ2WDFfK7^_r)Bhffwz)FWcnJ`P?Ut2qaI{k=?p3ZPBSBLb=&7f7ck> zaqePWdF9yylQ3uGBdT$k%UYt9Nq=3I)JpI)A83|Yfuq89f6>?X@Z3wU^>@VG9p)%&BfRP{LuME0zaDiU_Cy>xXg{JtmHl0YKKUAiX5m7b}nvFrz8fQz~2 zw-&;P(&1a~#y|YB7}jsCVW*h8`wm_VkOU zsiQ%RBayvlGQnoqq5U`4)CSZ|v-9CVX?3h{Cno&06h=hTXoQtc_WXEMK#`CW5UQ6hs(O42xTB!9b0L#4py6480xlE4MeLeBD0ip^ptZzy6=HeojyU){==hYezQiEfM+mIeKkBC2dhif|M zAY$VxN_J@08duM$K>0v*2IIc^o{5SX(6`o9pMGiS3RmZ2H1jVqN*Y7#vpi}%aep0; zgZ7J_2G5v-$e46{9|HJ+Fmk;ICuh|I89}JcJgl(ejA|}lp33_Q0CW$-zIEkWK%RU> zKTOrYiAL}*4E1hhs^)GF_G03z%ZUL{ZvBYZQ9P7rJlGri5x~S??(MPNk!@?2?g2mj z3ilX}d8Fi4Q+iw?)pAfDs~RJPOkm#8bsVAT+5OX}mOs6db0l zDXL4H`Vp~=Zw(ga7Q?ud%DVva&|sj|hk5QWwu}HQo!Vm*IKkdkcTf+1NU1#nCrttt z8D=Lbns5%=jvjvLtYwU4*kqne9GQu9|KY?CHUTDDDZtO%Y?2SMd73_NahUu;oFaq|X6Q+^XQ6C|ykt5_&U<@YMK5h8=(egdGtN8Ej@RQ%#sOw}C zB6#;@o&e)-N_Xkre;Zq~5sa6P_$pC%IVRgGEYTba=F^iz!C}1YQ*a!#sy+~^5{w{RRtQJpWd|jBB)VzqR0h|O~8HfqZ zW4-I_v>Kn1dni>#)a`ptK=qQnijqmT^I>IhJAla0-?&L*&tHTjLX6;FKRHrDzZDXW zxG>OEhvp)A_NglPzMt z9vO_(@E^$4hJ;5{o`Le&27 z%gb3Y=}5gPB3WswYgxezNNLp6Hz?qHuV6Ne@>HgV^SZf#Fib|+&Lk5WR8^!pqa{Tm zqD;^W=PS#Ln32#YF%dH6I?gHGW#dd6Ln^+`HzeQO8mgn@+M_V8vTcF-lU11Ii&O*LTb-WtW$Zsvf4rDN|)a&b<{91^cy3&n{ujjk#w;Z6e znI8nqLQz@o@asms{4SmekB&fKV#M5@^^$R=o)3L8_Xc;!x&9@Fw9L*AgKA+)!w zMJVc|q&vd$wfGW>XiP&H9N6{8IsifEj&S*DHE6h7X6Z-jfgN*fy}VdV1>3i+SwA(M zTbUb~#!6{AVjPN0FT|Z&EuaFWpbze2N^TC`33wmWoyD$t^}tb`v~;^V7z z3BcTVqltWa{n4M=#p5i(SJNBt$QQnzirfd&X~dKosby)9iK0vaC)#f#0d0O=iQya4FEw{(XDS7eBjny>SEoG+;Jr>9() z44+w3uhSPjDRQ+?t5GAX70*#n9JEJCcXNSl&pn~NFVoy-&hB@-S?7?_EyFwGW9*Z+ ziVnF94&fv!)RHIFBr<4Ocw~JG_1nDNFKF~zVY>eeN1#kB{}*iiuPhV&-<#&Pw)Q{p z0Mfq^Y3|S=(cCjK1?Fjh0|it(rs%E+JP9I<&f1i?vvO#x@AhuaMl+3b%M2FHFNuxJ zviF?}FNyTZ@4nvS*FO)WiQmf$ev4d-2y@}8^W8=f*l&#wbN9lH;fo5o7J zbXuGQalF)%V*~=1L>+7OB|ktq#R=_r7Te8(<>PZZS#)(5C*k(DyVzoheNC#Gc~Yuj zOm{>lg%8UuqO5tL^*|@G+3~~Uhh{(@37#_poJdWwaznd-p-9H#@SA< z1*N|-N24>~#&WFc_16HBlvB9f;iP&!nw<(gYhXS@9+Tb}aNUTQ<(E|ctJ zs^Tz1*Fc*7=-NSL5kyn>Yyn|Z=?(5 ztHX>x=X}Q)tTCPh(k!CLF!N^dDM!>{uTbdk$Qy)YynUz1(=lucQ8IBRw>NVINQwT7 zxxc}LOwXE>ywK=+PN5#;R1cyv+_a4tY|^3g4-ve>mICc#M# z6jG}1nUi2~aG}wa6ThM;Q5#1iHYPYqPN(a)a~e5}Y!u!|QU-MT53Xk2sQDgJ`jwI% z1e(ig$gQSECy|tiS5fK>tC&{1;4eV`j0$_(x$3Hz^Q*$?D}sfyIMryvF2jk>6uA;E z@d{%cku?5YOgJFo{Yos)ImV1hyVJujjdCJcNlUTz*}w`%4-ZPo$!)Nn1L2N)a|j+` zK=QD`VujysyJiqrZ+f2zAI~@e^id{bCqt37E`u~bNm|CAhAm&~Z4?>F7q>0`Y<`Ph zj-J0=`yD6rJ0ek@EKsaQo8E5vyN8CXJn+p%Vh6% zKPo8D*`^`K)(0}c_BR@7CfO_;j0^b?d0_1XM95b#ATcN%XbT$&QUoRsBpNjoZ3L=m zZ_c4aAG$=a3BJP<#PXI%IS?VMiel9{wB+h|qE+YSpCCBxh7;6{H%yeLkouBw9^=_|NZw{C8}?a^9VTBe_R z7RF;>Hen+%zo3TgLF3{$m0l|*Y`VGH8ncE~Ug@mc@};FNJ36IGm&9Uz|1gxVkrh$h6phpPsHH!ybbWt zDo1$z@C zeJ7(hl78n zlb4T06Inoup`(9}nTkcHqkE>Cia{4qK`UaPqGLA|?cVFu6Kw}dkVdlj&mtg9tm${G zAZY16tf2pU=a+RoYDMn7{kJKkT3PiH_Da*pI#1M=*r=xlUvC6)VZoy!aEm7-zU-b} zIs}NuQF(Dv0>w&zTKRs9)O+oKWrr&U+B*^oemfDKWHUWYvc+=5f68N_`=g9z)5(8P zjdR9#O*`($`?Sa>O;v4HQN2QJXMOZ7Es78{W0wQceJr|Rdc5aYc&JF>6g71ZR%+%9 zKfJN}&qjxY;RoNUb+>8%bfJB1RV@O`#=toz2on7Cu7`Exmy&M`L-ygltY%k{JP-gn zI(i^n6%M>epDJ%Wal||4Dw4f=I80fg8d|GejvTo3?smqtYIn0co4)>;OX*E-{0Iue z=?MX>S_F5G($5kG9bZ{%DwSzXV&C17NWTDG^w1Db?p|5E?IY*@ns5)+Ds|PsMw? zr}}Xs9usEqPZPs^_6xU`BL=yMFvzECZ+-#TtMw_SNCjg=JO+#Xd)mVFx)7MdQJX+5 z`!rM2V!^b!T^HThz7HtyAc^H1(t+-ro4a$dv8S9<#-EAy;TQ`MbLVL7h*PlLtOhGG z=Pn-&Y|H)lmL$@u7AtN`%ZQ@R5D&fU`Q-hoAxGk0Nh5RUaA;&sXf`0`EJfNLAH@Lf zAN){{(d_po@SWm({DC+hALyJkvUN5o%sTGfzR^WbkALX1E~zF!>TI*?7FZ9@$u7>% z8RSe{6aLoX=HvLu=6bI@*FYmqmCs zisyed{$b5^iZK0iwFCW$qYW}<8>+#c$62woZk7fCarfrT-4S58n#b}uN(4cy`nES1L`$FOMO(X{i!$S?Cv? z<*wN_E0P~*SNpz15>zh(L&stii%~|FWCX=XZz+0Okew^{z#G6~4DpSxK;M5@_$RZe z1VO`{QaBG5l&%#=qTt5Vj?B1Y7(%Ka!wbyjIAQon#Z{=NLzrsffyx@r04SUSfBvpK zo@`}w*B}C~(y9V3-MbrXYNQZA3~;OUiOk}9oOB9;uciq;1&|eA{25fyiolk>UP`Bm z%ovK-g-_$r6#hNc?Tu3RpBq^hlFXAE!nq3WGF&qjnXESQ+~C zlk4C#Gr#P;4g70?@$idfZ<4|ebji9)m`AcEC%$+;?~VYoA>4D=u@TpDQnI*45vuU4 zS}x7Q3kMQf{fA)M2$Xm-nyluq?AS??VrZbox__f7sv-KvSudHWCDvrP<%hO@V0IYo zCx4x8Po5VD5p6=Sto;K(!CN|qUmMzy!ACIimit6#HwXjTw-H8QE+QT%HA5GRK_R0J z;{lO%tH80^h)b__p&*e7OAyTGSM5X3f@B1YEyNIt`_X}(Cr%>=W=@%g8@#7tbm4Jm zbb(JPl{uNxU$|k}6Q>1mNlMkw zAr9AoKrE|eO?R6F>WRAakWurk4raWVlKOF=(zwK##2SXuwM2^oDNmgxiQ!P2CZZB8 z!!k5JeU#^&<2M18a9EG5O^rg}VwSRKOD&bDXIStoWO6#*NV0@T4s{~Zo=l6c)wZ*Q zt8&plT!8piiCBH)ozkEuD%gY#SR3(T44q(H?uc`e9{J@dT_py^ilL#Ly&yp36@>bA zbdyphWdkGiWdH1sUO|R4;!UFbC8*Uy^y_C#DdI zoCY(xz20Mj?uLC}WL z5|Y4RZMFbX90M!KpYC#8Ek|cQ zM~kw&tf5W1sR3<5{beC_VSo0qM{ts1&{O8UTD=GGo*(gbbwxppDE`fRK=PxP`DaxT zuk_RrMnyDZHlGH8yd4R5b$>|#Rvh(gOWO5uHvc#1_U15nEz8V~^rWAQb?7VOU;d&z zdAi+ppcFM}G(TH`w0!$totb|eTXxoSd93mMD+Ev`_*y6MVNz_x?_2Q%#$X=ilg!Qa zf-sT*rl5xCsRKFD`ik>^tLW-?u^mZ+a_Ggy8LrXb^<~#XBl$0q+T%YCc1;n&THC&|QN5Cj zF=X8a_9{z$8k6m8`dzFM;^Eb`YTxPAA*XTxia+~B@R>6l#{r_*KxMnhK#DSRvHlwN zzcpF6*Q?c%0ZnxK? z6ztuZBlkQDC3h>7_LCj4`BT_g8U04@?WEeT606hy)CRnQLdms~AT4APEBkh{1e6ax33sz@o?96%7GRw1e9R_0^GM2; zobGuEKQ3)1Exy(r)B$EVEX_S9Q6U;Oe_H^hjp;qcwH3B&0-+-B%BV}|3a{{;Cr1OPalh%*4^YmCrv(w0GG< zM?xc=DbM7-3SfQQAK=Bwfxff0`0`sW*2s(Ez#$=xNm~U%B9E67uC?R7Zp}Q@LgoJ0 zG(s~G(xA|_(L6W_S`>}6HJ%6!2@@eoTDg617s6f|p05EG_0;vc5=A>)Qa1UZ*f64h zbrphFEAh4WSyS_H9L3Z<{%Ke&{!u9Y5&V_uY3tE$lwXcu0^lzs?VOr2op1YXN|ao&`PNHC>D%ZjC%#il)BT~BPmNgJJyx& zC^$zWgS7dx<=~pf_vPyoewdOm1b$R>m9R0fya$XB%atx|`Po3=jlgiQY^(_pno>!Saums|;BU!y8r&PmyP5%6l8kl%cKDw-rV*-7ST^!&`{j zEjqK?5bV#8Ku@t4)c#I?d~%VXg;&J7e;r61Ks1n?4rsF-T*yM#-nvaCY4ojqMmNVA zDM_TYs)vEdp-!+~y9#P^3vYz+U|+fE34v*J&Jf{+XH6^yK%hGhM2XPVT)(Wo9&sNr z?KKb?-f~c!>*~0GcFaTH67qYwwNy6xDblLrdUm~4KI8>Q!&-KDM(sYd>$(Oi_Cu>E zfb$iJ0bHyxh=|3}ZWPR8nIXzHUEjyBG`iuOY(-5k=ZVmfhn=jUP5Q6x+bc@Kd~YlU z(z4XPU`8?NCjGJY2W*<7y7;k1=aSCpfE5HCp*Nif3U}L@c{-#el&u_)+6BN|z_J+G zfUW}$bETGe^K3?4TT`(}4n-D|oVMHt>t1!)TZy9w` zkX%ibzc94+rb=+=7Jw{5y7&3d;7D|U+C=E*P{$}eT+pv0+n08$4|17f3{B43?iH8| zr}?&uV&_6xbw^|skLNlVt1vC7X@`%>+H5h=Q9#X7@fjy;W!w`j-^r7~K5x)Bf_$=14Bw4niM#MwUGVuz0((W%VX|9F zXHaAbDxeZ?fEH$%1;wm?2zDwnadc)D;2VFy!{-)mX}68kd^9gqKcm32RnFh$1`1=BEr!p!|-$Ci!g*mky!nYZsme{4mw1DDa+dD&COw#VTm0<`nfVB$`ml|MZp-($K;DtY!w4XCrwT&SRClQKe@{oA!D!wHyOelCWP z=mRH{Z0q4w-ISgSwc>9SIa)>6dq~h5cQ0C_WiDJkRS zUO~Sn<>V`)L|2UGca}2-_T*H^`tCgS*DvCKCgVjx>?r}%gTGQtiwV-XI&vM#Tz?s9 zK>STD!|GL}Ccam@C5m%@Ji~PApOR+X-LdAWheqx=;g`elzOHzF4QyfCVuZ0^h?;{n z{n7ezg9~fNIGj*Gqb>|$2F%y@GL#8nI1eNtv9E}YXLgeX?%>p`C7<5K{t9ffoHE?F zc}<8DmVlk;X*8~+S%#Vs7beuYN^!Oj!r6&>9Ja(}3LhzEOyx(^3>R_GCDXYf(ASF9 z{z|^4h+^%#m1Gwy*6jqNBH z^mfzP9(~;!#KJ`}5ki>in5I4*)1rV7RCN}_O5)#XR?dADabyb!_8g?|beT=Rd4kRrS>K&N;jSe3a%P0@J7v z(ivGrT=IKRk?j}GlV|r=`&T)$EASy8qC&#ut7^QD{cT0^!6H6c0A8|Y^|=Kb-Ma#D z=FiZ%$-UGahvMUGe7S(-ChqtG>;P|bzFxo9>Q@B_SpKt;Z>BkUgsJQLbzg3NBUAm+ z(KQY%P3N#O8_@&QV!&db-Wyu!*i~pppG7p#+Wnf{5{EeOx|_NM>?|+)6kDGqv!uo^ zOj6J0s=tv3GG2^jaa=K{rGeey2t|$jZ)J8TN|8D-b($dz@>ze76+9na6P$>x7kl@w zG`Bvy-HXXW55e{a;ZOq^$^RxnY>5susG!W;Z2uptyy3VZf%4PX-X%x=7e+b>&g$oy zOfwg*pV^SO&hl)Ip9%{h1d9yK1k~2<{rN+hufbE%NH&B*m^~HnDd{N1?S5Xm?`pN| z?qbliOtovfDI?O8I<8(OC>DSQ-5V1^qDX3$>56Uit9vb7vP-oyV@oKGH%n112b)gY_9lr)i=DXR6wGylrf_&L@B3gb+ zSeZJhNsD+eg|zsr8W6YC6Y`NnRA6jy*j9WP`=`DJR2sEpJMnDTWq7xGu(f?cbDIiM zkqgYC%=!fp^BzHlv6^#eLKd2A3lBTr3Gv1@17KGyjBVM=(!(^wCBwEX#W^1lQSKXMwWhE%z^XKPkht=-c`4bQpmbk2@C{wkUl;=$KVA6{HV z{l|7Et3j#W{afc-RZEeBDLV{rP?69(Bno*T%$RvRO(2^J=F?QIk#PqsKNJv9@EF)z};wuq1JRC4h*?wrbqIFtQtyX%YkR-!Ail4syMq?@c# z`5p@rK5toi{jA$TJ65WK$*8cefi-K9W&w;#Z!A9Nck{WKu%rZDk>O5Cg{hnNy=?*0(W{)CMilzl_% z`jB-09fYnN`ww~PS5^5jTGb-2{8kkkx&o|lHFf2pQbnq~b#3cY7n;F2$ya|ka?A_W zAw5_k3qqC<HMN!yNnq2Bz9-lmpi0S@91Tx6{;nJ&F7A~$tdAid$<3BV;#~;tezq;%t`XMx za$>vP6@a?|K(gkU{H1r40!pV~Z>n$0QOw;-lvR^eI9+6IAlCj%DGWzKCq!@^q3Vn6 zEbRP)`W$y3HvrHzc$k*MK~HGoq@Fe3{K275JKw@sV%$)X(@;_6rLVhTg7(zj>=MSu z9@}xD4cpOX-B5w$?=#&0TJn)Wd3*ekx!0Oe!%*|)R#ne86+vM~pew$*^64krI2WpN zCOH6q?2FZCN&fs+203C*X^_IC2fwd_MM#V_c#+wAahyL!R5&aA}w?Y4A6* zQ-!eGWMGD;`z(Ee@$2mM^OK!*mtO$+S>SWg6%4+)S=5|N0Hde$g|CL~!aA>Vw{inmtfU5r-Y(LeL#hqSqp zbxTqDX=$e4@4%X1e+hYZoGY^lrq%k03HOAkmG0yX260#MuvrX763l zgqlxR{LW6%*r-OgJ65~Vk1TT>`2=RWbLwXxG^^T_jFv(sV} zc>?FP#aDA-B3$pEBPp*$SaeDvekg#K>K>J4ACs8yk^=3|to> z0ANJ-No|@oV@z=#tz^ueFARv^FE2hwEa?<|*~IjqaU-N!mjRiPfcRgg4t&c|vg{g{ z8$PcI&bYavG#DV&OP3qThmqb(SLf#G%`Ogr=A?R^q&A<;<{{%}BWFZP2%OwtCyW4V zG)+3`!SY6+zhTk%|JYQ!O-n?^gd`KF0ob{0abWXGPZVcq4(^VO3;rc9`KEsD;xgba z1b=drVgi6)PsMUH6!=swm_^Z{ON8&f`+}SlVX~N{v@3!+97uOp@%AM`Q={)(|?05y3JVobb7nO1FrE7 zpwP4fU{$!vkwcs<41?m}`+XM`5&@a3O(Oor86wWEI%rFKtKF-Fy6&^nt-!A(x{Ds5 z`I|3?wNL5`?THA*$@T&~{=83Lvl#C*8{84$WnYs^^~H0Ni!%b49Y9Lvv-!CCqi}3tT8n@7*_-Zt9B8{M3ma zeI0Tduk{JbR8;-hZ@}D>BvJ+HopKf|%us`R_D)5qtj3MQ4KBx>(g?w(h6f5RLM;<;k$iOAbIpy+Xcxt{EJ?ztfs7 zttcyB+(hp)o!qSD;syK=;5tsrh$;#W9^o(^+M#@@pt`YwJ8i~GRU(c=SKlG5$MaUN{yRa>CGmNmcl%Kvolz(d29JF;kp`qq zyKn-9yR)&;^;I%;K*SV5Y?4tN+OE15P>}pfzweHnM5(_{*`O&e!mlmRHnslis{FG_ zG@Hiee9MmD$%hRIm=v+D0)5%+DR}lWSqD1I+&eF+cJm@rCM;-L8nx=Fa~4BPBF^7V zZ?l%rRf?8l36ufBo{oo4kB>*Jc6<_9*5#qxq^jU|GE5!e0BrXS&4(=51SR0Qm}%yq z1iIv|u^9N|j;2c4I;)2Hn(@x9t7yY29PI-{Wdt8a{FE}sk^^bev~5_mB$OdBp7QN$ zppr|b1~3T?GBHie!#snr(hTA}U*t#WxoP!4cfC3bM@+##XFk@_7JR^;n!!GqZ%!OT zXsQgA@@2Xu01IXrqy`i!8YDfqYPK<)c%|hqwyXt2oHl0$3uz`v?Qy89DvWU$97_;cz9j z$}{J&2aE~zQHm8X-B9}NF4AO8@+5!$e8Tx}u!&-&0xam&lGE*%Kuqc_Wd-k2EvsKu ziw)qer6%c)0;zjxdZ`wn2B^DM%^@WFVCMIQDhnJmi9vHEnI7-Ui6zN) zaU|;>nSsIegBgAJZl?RP^KbB32@YR==b|xaku6HA`-_zi=F<{*V9zbuSL_QlCk8U5 z>M+1@0Kj-r4(mr|tli9F z&&8%{P(Q2!QToTgbAbUNUB>IjemC#^=jKE&0l>o7U>P`se`Z?>*683wxBhV)C;YV} z|8?h+jo%{syN1kvpEdl2ld#q<59b~Y#K-U;3@zMV+32!w>T0pc1_s@>BWBV4wiAjJ zBZQPskduHyw9Zu%2!RDf3N-HTG$;0=$U1&hC;t@;<)uk7HdY^GBQn~Yh2@AW>|8t$ z384HA6rM`i_AOuI+s2|T+MI2ffE)Kx`A;qX70+e;MyRJl>r|yeaux;y)~7nIks6Hx z3BV60j_Y3&Cfh(?`><8KQL-q6GKdg!#Qabwe0&_k4SIh~3+X-Z@uRwhDk?_>pP_CH z#6Q$#fYu(J%rp#mnwD@-pGL8qZi(A z=7L2whplx$5}ER-KNhBAU5Bug`U0dW*(&R8Yi-YbO+Cf640=Lzh{W4zD2_-$;zD97 z^sJj+WUF{}V4=GDJ2~P0VjedPO z#KoV$^3lTQIPc^UVypx`uajS$T+u+y;ni>tF>sZkuPn%5&p&`+_;dr z3~uj2VY0{)7D64w@Han)O*$$azI7FkTtGZvvzi*eZXE8O5}QH`Ztw53^#&feD-|*7 zuSgG@9*y@DfdhDTr;BS+qY*7Asl4@z26aKQpQqaJ^jV*puJ@b!sY^IBb-=Gp_)c)- zvbV4vkM|iuL7$VOk{@scx}|RvA1q!EfY(hL=ZsZtxn1!n)>lC(w%;nv6ewddnN6hP zLx2{s1F!b{5BR)$K3kt{&nmWPi9V`KpT|)XO)w%_dSjZ9+) zpCof;`h-v)3TNN+R#dpXJ;3~ZsYv)BX3w}%=$};13hLjE%S}SMq{O^bsl`=K40RJ1 zQ1CA<5fK`2_qGYnMnR_?*ey2c2ktOQ(EWJSLDXT4+unX+f_s)aM=9u2-$2m<2PZ7m zcxk!Epe+|uVp^OC#r4HHko{OI&)(AP+{b_j70uxsRqGMGk=P($Hvlh;gwv&?te~3c zc1rW7vM%A}bkY=Ut}rmZP)^ASHk-_$Dt6fO30=S{`0|3A_(u#owZsawlETnS_c6hfk0a=bk&Kw9gU^PjbSLm&tNn8kk5zcok6 z;p1+cLCHFSED8yMIRKA4MW8HU`}dtmZD+?2Vi{xJ*bKlqkWH>EZIvnZC=+fHPcN@q!N}1W`2RZ_ni_2Z3JJ`V>HzWMYjH541yp0NIUWq( z>KpoVD&sn^ar((~sOyY$G}k;1w;Ue!ITRbAG$~;K;{dt5oG~ zN%lzay}1v6zI{DioCVIu&&pZT7H-Isfmw#n#j|Hf<4d-lpOyw2zwMuF*wqNEI>S}c zMH-JlpPGIW_)m9t^S^(}0eb0U#mG$hRH*TIs%@Iqb3PwfW0!AkGs9SuDdq}6wF@6!WH8ld{P%7&o^62x*6zf-cjz77VujsKjG6~D;VnOTZl-kD%qp&jys3HXO2Ra z83ld6K41v8G0imuznwT$sEO2F$)1y<$au&g!Wh0`LbJg^e;`@oLDsZ9RFjC>2XouOSdRX?o3eg( zeaQqsyc@e;O*SbcL)~JEOGr(~N$PXD^+MZ821(p;lql?5cr-HQL!$AowK3vt*dXvz zHYO~b0AGgyc4CWSbUAoFJ;DQ(Vtl9&E2wz0@EAqUL$UR$r_SbSf2nrwWOkeGr){t^ zP#jeH0A4zDx37-=ovk@!b)>taq_HNqrP3#l)X*0dq2AThBi54C>sOaHscWsdp#Jeq z=`{?a9kY1R*7UI8cLvNQ2+ga@a1j*@5WBo#z@kqzUkuOv6(mdp-kTlk5}HY(2BB(n z0@9F=mGco}8kTT)e++8)Y$W5WYGgA@jIg0t8CCoihPMV?%WM1SdX~V0HvAK#Su+|$ zc%RWFuT@Eaj0PRPR(~5YkM1l-2xLx0!#xQepGqyBp-2qX4}ssKTZUSq-*4{Z&FA0_ zklZ9t|8t>e@ok;j=orVgk;l|#LIIZ|Z^n^VR;p+bj-(o{&e1*ubyj&kRdn2gGlkHA z0Bu`L&XUqsp^OTI{b63|fh%R2q7sb>(G6@faJV$%x4~errO|sBx_@yY8+udyo*wg3 z_D2YYbI;2?+B{-2+eADKeWK!|u!!gZETh6H{)mHwm0TW~jG1#wnGV%)nbk22{-Pnt zm@-Wvu)p38nIJx`gclf$BcD;ZHv03nfklZCp8uSE6KnxvdIq1-3Z@PSC?prbD3ogf z)!a~t$g=Y;&qfR@yQ(9pG-W8iDp=3n^f@MD+G?7aln}(nKZz~`UEM3Va?T|I*Vwjq z&@PyMzfOgprkC!2BV0%up}YOF#mBY{0Y=)nY)YH}ouc!V~C!vP_;*x$|Y=Ts&v{?5FVtq zVy`(?3|h&nIkg;V0e*{@fDA!E^<6fnlvir=c8~*H4cml&oNoPdqrT`V%~nf3@0qy~ zD_i(2)dGjJa_?@EyXbQUd66I6@L}9}oN*Vx)7`v_C4x~P zSu`feF@@CbNj!7sqM`kVi(1vVwe>!;!kKE!(A$i{U7fK9Ji|i2FFhpVzG7)w?598n zxzbb#l0dv#Q`|Aq(Fvz=%mg)5%DavBD=<6pPnKTL<`QX}?umJ#p({3%D~UYYejCv5 z!R++B%wBHeL}7{R>47Xjv?ZrvZ$vC>ne$BxfG}QM29EAk<+(x!&eBr#fMkR#3i}z{1cdsu0M_ z?2+57nu*2hXPQp_YT00CkvhAvtfDhasX-}^fJUtnyhTBXqG31SpzotRt-({FZcKN* zO%3Sx+#930I)kJIZy0cyzn&b`&X@h?(J7X3$fkT>uHR1aex7i@MUZK#AD)OV*Jb{# z>T`aTPJ0>-+{c!V6bo*^IA8yt)(%Du>?M%SA7%C>bN+?<5_@EtaiByqlMmcCm0oVB zWs*50=h(K()!YEUxykjD+8(-IA97=s48O+^Z;Z*&FHhPkVewC@-KJ7rr{itAm9@V4 zR{@}`ZvZ0mIpe7eTkoE7(O}NiTQQeB;9d*{zAme{fizZzgZ%&^-`2iYZQ7kntjJ-+ z>sL6b)-ywFyP|??+xms9;rZ=@R+Agpo*qsukK0oy@c|J)?U@&c%07^VVN5rCB%H46 zfh(9AG~3{KI{|KWKY_!S4n8zC$hHYFw{}}Um(Xii=AF*}T3~NYebpT=+$?9CrLhK~ zePlI8xBTi6owLY#lN){k-CUkETwLY|B5`$(7v5|Y6fm;sDMLyGf!QSrK#!r1k0R7& z2uI6kUvFg+D;T@3KML&JlDC1hYo5;#%xzsg9KzP?XF15gv|3`5qeq}xMlGwgoB4t~ za*_@D-)NEJztJMv4@}-62ZS1stRfGA?cn%r( zlX4T3nw6f#>#Bk=cIACPVW-^?Iz*Tl1zng(De0-O%g12sEaTh#${F6m7n2HpAVM!* z`EC0CR(fvzl0l~TOO?i=2?k}k^i7YmHK26h7`kQ4)-1jB^2ilb)kPD&#paEZ>32j^ zEu>CMQ~ni)yHLYkx)}{1k%=lDIe_c=5rH!isuPTsr7#KA_@KIJ%xM^w@liK{jFD2h zf#ow9)?-*&X~aOmH56s*uIUxG=6vx{)vq3fkHZei6IiWvk3qb_}T!CW>I-mks@Q#IfgT?N7ii>iER8*xviL!CNp*;_} z$fprl`qw>B-8a!@6wxug9>iG-%Z`i3i`tP8Ps>E#txtUOk5}Gz{Fe&6(89xkqnLgT z{6Wb_)RI^ZVEPY$dt%VA6^AsvMe@8D_M7s(wBvXcJJ?K5v7iAUHZvxxkwTi>gW2rL zQg>Vo@*ckhzEVZCIDeIh31dBbjGN!^P}-rJUApn?_H1WW@YPny2pSzqDkgIz*dODj z@4~&^)AOE=rG9mn^N&;vd7i#-RQ@tfzcGwTyq_P5KF%0m_54UsJQmqx4T!KHB?ldAW#$kf zxyN)dmoND1Zk>%z%IUqt$srOb$Y3|<(W&&W@Y65Hfb-!Wyz?)86Vckxg2o3=;;s%T z@ii$b8RbW=)(X-#aN)&;yyCGBjEVqso1C(!9GNEBTWb60UA7UV(C9 zZN5Oj?1bhrE*=%;r4K6MEpWc(JY=^zYULhZ!u zQL#VBZ6+B%Q|{zPsy+E=3CE2f84`D78bt)~O2gE4utv#oD4C^*+#pp+CkKj}m{f+b zK%#CGdD*l$$PiiPph=(e!R1XoC@+jqKjZ^f>IfrfYXUnhoBHU~PSKOK7*=~j;#LNs zBoK5+rO(x4M(xf^r?~*Y zWQ|kzUSLnOKSFx9sUuaC{{H-DV$0uD@%H8EO4&Oy>uDx7&lOiZ+-^6ozcfo!B-(R% z3Mr`eex~P=vr+sTKljrGlHIP5WskWu(E0P%bIWq~s^S^d>qBp~><~&7U@Y~R3Qr)^>NcdIThxr~WU%@S4Y+Hra_2%Nafi25`Vs$+% z;=ftKj(RLY$I*TclDM9;Hth{r(a8~kL-!qn} zVjP;5*9?h5Bg&HW4D&k5K&rg~Z8*{gu^vAl48bCC9X7Motu z4gcYHYI9GC2mBTtMMz-q4hA%(crRy@Q*A2gW;x*|1r^+idoF*E(op;50MzcGC3OLb z_L3U@1!+%KVsos>DQ_j(Zk$T;n&({j%~ z%foEomcHT@aiQ}#M13f>_Gx_lA(9qqpZVY`Q!qBHm`5EY6L;7>UgKS2urtd2DM($? zhaP3BJoe#&BDQ&7PgQWH%s~}Vz=>f^H(vWlQ0yE6+F=T*-`f=j!!)A@0HlN3Fxi`_oXag z#6(znSMdst;P{ZLBanm$jxQ$QyDhV_;)uWnd~Gbjh#7e({BCkH98SF~Ui@aTQ91V2 z>4#zRH}pbpD=RAWW!tuPUx-&5ZK=R~AgEx>OdOp5!TF*AbEfvifl>pew6^RHI8XpH zhQtrWj`bEV^z|$JyBL=YIv2Lef7S(@BLkOHP`9)jXvd@%db&jssKX=ni-a87Ff(yt zxLA8$i>w^8XAh^qAHQpB7#Iv;)0rfqRQUd`_a!9gjfv4pw@oHg+WedW!btu+o-etj z);6a*%1V^Y{eUgK7Pz`x#f;ek`EBkyxjjI8j1Vh6na?|3_+8JW6t=EIfXZAfuw9C zQ0O_D|09~bKmb%W?~n6G-*!z075qCQzuNM7tU3L9SM+ud2Mk#_RXDuH(|A6EtWe$! z(erhnL6|RND^wwzeZZ^a`Oi&E2!7s zuVYc=_LTA|T}OV&84*6b926H8;J%H_=1P5?+0$)+L_IT9`vBJ!tbzI4{H@Gr7zmvi z!_1ulduFWe_$FDTj4N;wz-g;o?>+baIQli!n>5;0Di!9dGJ2^er3v!Pk-`$2f1-8g zvSzb>#s%nX3_eL8XkkrZHt2*6Bw#428g)rRGHqcxe3w*9V>GPv)_G(-<>ki9m!4ir zlNDuv)YBU<(q25wvxcS+9a+Wu1vLDoD|3p&NzUL+>i>j_6JG3VCbFCoRj2pkk0y-v@8LQ7;feFj1?p`L@|wR-Rl zJS8ByUWRBHOE;z7uVfdZ*Tk|5q*}F%a*G6N>8(oF;>52Sfdu#<2bpI0l+=7a8P%9e zV!i9RujJGd66}xK=~SSl6UP9F=UQ>Oe$f1$L6BD6QsIDP*!^A*`GzAF%n_3EquF%E zMj05h^KR7suw+mihoAYMtM|L-FL0j8b`5|<+~B+F)~@HnUV>}X5gD$-C1IeiTIE>) zoF>(eHx)^EHF={rKFPl!>BQ3fP%PU2!SNPNfo$boE0I-vjS3QPEOp62+l673rQZpi zi-@{t%v;58Z=~RQV!1F<+%|@$&HHC&XSaEj8_tu?wHTChMF7WXqrtOi)oMx=2@Vi% z6-lR(I>b6Uv3Xj0$FgwouQO?#uSX9=Edv@+o#41`$TJ6({!rDJbMB@DYF6lJ@0DlT z2JK&iLqr}S2Bx!?lXVyYPomvlz{B&1qckCL__C?14!53Q$|Fb|>x{hTaD$WdOqt(r zD!?ja3SUVnHIhW9_+R^x=_9tvVrBra^68;Ik!ElMwX)O{sCG?k|M}43b%EI zJ+O@68uIp7Q;kZJNTB{&{D1?WG^_Xyzupym3rKD%u#@rM$=gz`GBIW7Vf>n`dYuLLXh< z-IO^mt-i6?*Mba|J!y&wgu8mb3?h`|%GEBQLv^G>LwN<6#H*vBMu*ehxX{2gbUz4| z<8e6jX4^EM`uHo)0uPD;^a{{*{ldB#9Toy4$Q0EM6NFzz;L5A*Eby1u^l~Ms41%9S zMhLJlQ$zg35y-)?D~okmDqjM%(T)b0F*eHr=J)J|q5;0Xv?Fs3aDU)S)02)GcO z^9*qCE3LNCI@N{eYHO+0p1&Hlu#1Q$le?a)zj{3x&PER{Y_gf+J+zQ`RuO(WP^dBz zqCGd5tkjwtL&N5LZO^$Y&EQFumGkZUmw@I8W8jO;X5+;TFNerb$%za--w&s#Y20+m ziK;z=KZJcHtl25vOROs1mN9yP8LteAH(X$j49*sKen}lGrRg@;oCV*($I0cE5~zgz z@xOQb)XCBx)Ej;qh!-$VYI++8Hy}(%EJ0cUtQjHj@#}00lHkwo5hAc9O?aT<)2V>c zBouU|{>{_b4q4WWf>?++$wTmj@AUb_Q`_%p4_R#1__Cg$RcOgX0EkEJ2@OkQ*XL&SExA=Zp(|x2G$HncBs!M^6Ce?pSe0lB zG^5V}TRSts*0fzf*&2wU8vEpqxBzE1PJ=(U%n!gU?My`7Z3Uh8XFV-%Jm8YFKi-rC zKmQ;!IMFg78!T{CYT^LN)v8;Ob<50bH<*V8qn9W;ZZR zfxIY300yi2ECXjKZ=lmVDWO#|nGKE0av&m>$}VKOQzF}$eqC{*m*OU5_(2Zd{3UaBok&}1iV7v>uu2Ujb7)16Ah-ZCWN<;L%GOSpIqoyTAxdKE z@=@s}RUAcF0AJ;HfC*iwt$=?m3^(k-Gz^Cv1yf2!FrTd=cd$B~kD+7D4YV!>yDaU- zh!t9K9{ss6W#|o0HLe=8;o)IPF|X@&w{r%MXD}~ZnZFZgSQ5=mo_K4?%I-kSB|AZ; ziVnHlm_qhvUM&u-fZdFzjHg0B(W(c_JM()Kg)&7Jp!kJyJ&T>#PYEWV=cp$G&f%2V zd4>0cJ>EIKyFQA9NfVeS1?oU7mRiURxjb}DC>7P^_-8#xu+R@Ukj2Ih7MOx*Bre>o zlD#U?QBDRGxUs@sL|j-s3Kq#Hst4Z-0#mC+%Qt3&;ejV1k^omF6YSHQo`koRHHKht zn+F@UWK7NrX_Lq9&!;~L;Da6n zJ3~TzOngQChM;^Ag$NWGAhi_L6z*Xw73Cn03}8cZ{k1NXnt#NKeI!SDM=fkigkv14 z2+=xmX(Y`YzfoERE?ICG?hUybLlX-F9?bs%;El`IxOSynm!ZU0=oPE{;fwCHDb&TH zITQR+;#MEDB&?Uw&f$O_Fv(?qE=IRmkQSd(#M3_Ew?okZr0tlCLj)t_6cs?o%1lmj zbQ`%yRid3*rirm*HeRr=~`&QxAX;gkbP#h)2Dwm7RdARE^ppCNTgR7uLc~9N$~Sciu%2tp(QTPN0&1r&E-N%P zr{Y%%I}A0Z%(`9LbpNo|H)5!G_V34dOXh6Axv-m#XJJM82FcDl3TOnEY_({X0Y^1o zyIT^Auc=i%QCdvD;w4ZQ+?#ak_a08De8wsouB5YXeU4)%Czq2Xv8&yz^}W2kK@h#Y zZv=GfTGZ=f4gZ$U?PM1<-kcgGdLwA{IN3YwhrhiN>kqUGA&H>Xcb*lQ(d>!(ZnusVv{uf6q~nb;a5laIX*}9*X8E~SRy3lo=g|N(M+03?jS^I z52p<#XT<|(tza0>mdaG65kvKDWeMGm{3KnCj)>v8U%ctcFzh=IsPlpwIb)QheI?)0 zy})huZf)mli`nENzAML6#i*7!370pd#5Wjw&#GQXy!A<=Rvh+GW?7tFQvFisXPaW5 z2?DKA+@PI~mv)7KP@}v2rLiy>fFVa0Z{-yH?zg1X`Y;|&Z+N{J%eS?(&zWV~m=xIlh?atl zMb#b;Vj;FZQdHMv(5UP|@+W33Hy1g^_dhDxOnIzE88iL@Gu)b1w@BC`kcdx`cPMjp z@sgZv;RDPbYJny_VPH#zYMX9uJLUV4+gdtNr%Um3+x>$}f3%dcPE_>Mtgo03@3np#hN1WH5=b;>1GJ6caq$j3)acjWXh) z0Ur_^2{2Fp?5|8W{J_(;_pid*M`9*T8uOuHh!y=3W02!+`VKC-HetS%m%>f-dD5rI z<7noU9H;TvW{`1*bUEn53fs{Y658s#ijZ3!5`7$in%W;H{NAvV^{25<_8Tt14p=%E zIskACBay))VRfM{N76;ivK&ix3S0G}yBtnTuQ98FlCi)tNoV$1j#);-B_V#UhJI+$ z$1+$Hedk~zglh#YMU(@HgqW>=

3JQb4ItOC+?sjE)djIzJ?wTh01%i&b17Cb7AZyX=x@a*L zrQ_+-wrrwEUO?YiH41+l^OcO55x`gB>;+j=<{1;{hf_86MvxG3qY(gL>nu<2KH&oI zWkm_K2}giwa5kO}jzcua3tAc!>!xm7+8n-u9kqAKu*CxtSgzj#a1U}6F?$Yz0&*S& z{boSdF_k$muQAiw=l<`vKgATM0ao(?ka?ki>;S6J&1(#zrvrbDh6;ehYFUi^4VM7e zg`OYeJo1MdA!bp7$aDhpb`@$D?3ntZ#W^2NFnsT+(9|qeXqhTCSfg4AW<2+s5bYK~ zUT){nj+YQ*V~zOHpVwjaa%wOKNN1p0dOYo+1Vep9YfsEvMlK*~GU+W=swjT9ZOU+}O}z(K1_x|u43#4#eIGJ1R{cTo zyU?=CAuXbloZ5~?L(z=8htFEb(ouimz->~0pF7IC`cPM!sGDd3^}^sLyEKYxFHO^DQ)- zbm^M|Exq^ortWuQ?hNk=+yUy9B4RE9MxC4thjpmkhZxDxdO2ASN8^(R4A%9YR=c9B zDF4uc#|ggEwv-9i6>S09_)q~!;%AVdd8*p*NpFfphU;Oh&nx~V0yE9&uqpATJ^{xRHj&_}g5wv*;|_6B zxe13=&Kq{_$9PN#1OL*7oe&+QO`HIAM9yXfq_Byp2TW64zkCkVC_t=Yb{hiuV-o-$ z-H9?>B~oSnMH3Tf6ICvUgku&U%M0)vdx`hPByB6mrT6|3&>u6joB9ysc&Y$}oVq?| zOO}609n0BTd!*n|jQ0rL-)t~ZZGwCDZ*qS5!%9Ok+MUAZ^pzRu4R+Q-q;DpxF$4#9 zxf+vn6-|+hk67QQIA9VWQ;epxEVC|dU9iXn14}+&$*oVt!01qiJ3>QNH;X%B;%^AQ zt)-}q!D_>#8KUH(sRf4%eTG;Cfa61r8O(oX1pf2U*FVuP?&%ILYh^D2WQeQr>7G(I zlJg15A_4{_khT4Mn6CE6UFF|+DWzp->F8s7&P4u|p=Q7*W|jzGodb4VPD&b z0{uUe#4^Us7g^Dc31)dw0>Gg*4+(#i=K8v8CJpy){!G4~FN9*ZqP0Hbz1*qlO*( z7zKO{&n~-l&PM3|D#it_ZUC?s1%)kI=VP+xDoTi+%`W9;&Bj{onzZNdB>{g|J$Sl* zyCdvR4jdj;p#8=FEC_qTSTK1_AC@22<*}n#pA18_3r?&TFCm1hM1QxVYn4EoLr zUwvBf)!Pg1+t)W2S8tSF3BeL$?f9T?en~Og$aF(ycN<=YBWa2n#ZCBah;@98#Y72`$t~gUky!z}& z4=crtkkH$&K3x2Vq?>OiVhKFNT)JN_Wwg$-wz`z@x;_;^3e9XH{uTkGh?(SSE^auy{$_4nua>7L@H z=b`5mQuPGh78@0rbxO_sr4sIZY8t~qO_!T2-KTVHeBqySKY|@Oc&5>uNfSS2O(5rr zC0coVF8R)xN&{xqU|udCW2%*KN6!~b1+*f_bRsD(^bAloqs^+$SGy;KqgX^f{Be!7 zk|%I>3_v!;p zrzMv12O4D6;ehIQZQ2(1xRCc$TRJ;0!c3L4X)0IsvVuOt`nOSFpWf9KE?&CLT>qml zl8y+SgQkm;W?U3tXNUskb{7#dCYf)3gR~kx9wWvLD_@A~lui=aH`J!o1eJB`pe%KQ ze~RHsuNd|}7wsJxXC|MID5rA7*Tpt3kwIG!PN3QQOQYB6zRMXLzm3-a1tYn9*WfxB ztj>wBE4TS?T|e8#Yp*lS7fM+x4%O02Ow~!$Qa|s;UH`dZd2;J_#T(W@TZi7bLf|j)CH59@ok;| z*%kGII~*$X_s&1enC~6Mc3TXd6tMctOI%nyy;F20U%a)Q9ox2T8y(xWZF9#)hdZ`9 zHam8AoQ^uSZNGi~=X~EyT~&>`s8MUp@mq5~vub$q=kPaJ<13r|Y2hq*q85IWy>a09 zriLmyC%I(nciJ=Z$EoL1x_5d2&f+iDCMXA}~T-^MhH zh=tNT%Qzc?)s`{Rf0$8|9mq`&5G0&z%}p|+n%Mojmtj~O;Z@1RE1GI{mdC9NJdX|R zc5RGUQ0LyaxXHF-^ae`R-zR7l%x(7VF4X}uQ*$@ zv&oSG9krca#?gE@?CfHD!UG(oy}hyy6jg`uxIzYSBnm5mMoW)fW;X9C$Ju&10Anqu z9u$%%-*OI7M=UYUcmZujriHvoF91#Tnkb7C;EL9d}II(DkRV3rZdF-AGWy7^f6N7Bt`o?X1aOu*#G%{Hsq%@1F9#2qN zDYra95_Q}sfwJo13-{A;Q^lsIqxR9!9lo!`hNzT#+0t@@g-nN8?7QpdVqg7_y8-!3 zvgid}xnO^D(`kQ5RG{L#CAnX4zmVv5aAmg0w_s>MOQ;UZdWadlH;yCcU*g|W>2I^O zoQ^@1yKve6;Nhu>;Ti*fLRxbU%k_4C=}gXIGE)BGdJWAUvRE9lIQH;iT`Bq~bRQpZ z4e3OPsI8#syEg4BgwbnylQJXtJ4ZX!0B968X5==srs8OwfoXbN?wq zD=F3g<0j&$bY2DGL+TwKZQShk09dFkGdgpZZu)VWNWLd;aHfYsU`<4~r#{ z17kfw$crHz+s|ke1S~ONbW44+%o$0D9{pwg z^^kqe#j+?TfO5n3%AN6+CCrYZg{WSt{@7A%$l6}zU``pfV3yKQ$Kq;0O$6$?79ez? z&cYuMPlyt}_^J+CqO%OmEo>pKqhgQAn!8J3_pM4eCKP;!LXA#J|2%nYW;kdbTZ7UX zz|jkCAWlI-d1pW)II3}}wbFYwjM(O=B5`8=1aG<tzxx)_M=6g-25e`S5Vcd4L z}%6q{e0)>w^7(I+8m=xqD;a>Uhh{p^eEP zMf=qkJ7U0QLYq8^)z^g`8_rtwD$^oQ9HCrs01q8DZ{3k5A%fM9WBViv*5T(4ZJj{M<90O_`<*Ev~2QCFhO#ym@uT5iW*9b$*fb@L{ zh6f7T0H@*5qVK3Nhkvo?VcLON@SGhpmBfFChOQ2f*~MHBdS6RpFm!b4T`7ikxRC!v zXr*(5%44lW_6U`QTNcBV%f!qefRmpNULn2=lRBIO@ExAxU*Yb3Cl~&U!g-Sf60Z~V zW(sutE1Egw0xi5GcTWcY$vrf%7cEn4v&9F zkza}SjTKWDM}A!3P3*kIkG?AmH!d!=(ya|)Fnd}qX9jFLdosbT*QI`1F6A$Is|chX z;-EpNH}C#63>FWf073~jk_1R~Zo=`*-BYWlRoZL{64)%O{l%U*=m!Z1XcO5N|8Y5S zsvDpQm)f&bAv248&g@1duG7`g-q_o`-QyI;t`p?a`wko0Xyy~eS9scZT9XXdnyJm0 znytd!8Lky;O2(mbHlO`MKEfr?leKgfK@uxbxB-DpMc=o@2s9^l`dV2>H(0qua9*xs zVAIzYR2mFw9apq<9&Q_V$n%9+xA|e*#CjLRF(FgBx_(t0Akn5q*vsrom_1pR{o$pv z_bB5+Iap5X80Bu}FEZfYX$Nm_8)m=gL@goi>}rV1vh(w*b+#yfeaDZL@iy{-i_6u# z;ICtvSkNzcY2aNJJK*TL+8RaCm2h=h+Tm?`2d0_hA$&*-mD;?y-?MM4-NqmK_dQEM z*1oU;Mud8IAPeu}q5TfKvI|iVr%@OfBveszlDblzkP6t?rbzNwr#>}$<*yn&GP1+P z8xL0O^FzBBH@4d1sR2u+{X|d4zhlgEE0q@+){K5dKDJp)5{W3))>Yoeh_)AKB;74a z@;N@Wu0M4>(^-}viS4g*b0pW);|0Y6EOo(jP1q2gSY0Xmr`vnzl69f51cmD#P#DNU>Z~Xyh=LIE?LSlm4t|0sX zFrbSo2wq?atVCJ^C0b(s61UDJbI?ly48zJh-L?Xi>-BcxCAbMKXMEehQCwfLgSZ!Y zB*i+SW%X|lVzoN!lFk=i#_ zWRQi%V6jA=I((x02zu>LJ@YQ{;`z#cyOH?V=}ll-Xo*3q_aZ9BHCC#Gf6A_uIG82J zUr0;r8e`PNb?}qSJAC^iG!|R?7K^3KuMw>L*A$o=Fv3~6GeVirzu<&2u2!u!8;`#) zf(#9&<2jwAh<9@-)FPvnms_4LmA!2}zpS%=mMM1`5PNgJrxSc$#*W^Rm{8BAil*V= zk^z8*L;`{bOH|e|X+aBKP3y@R=+dMh%I&OwRH;vwn)*0%Boe*Vhs6506lhB=qIOTN z5uomQb=N}c3{oq_i$GqScplE1#~N$(rF0E`LeiUgdhO3N``4h?v9YB3?>p366$cB( zC_MyF#lIf!iDT!w@3j(vy`I$;;UhoRL`r}kgg$?DyabnhS93j=KM&oggI~8tkLpcs zK{a|AFGWt2h64emn|2=is~2A7a0?$*GydGyCDRj-M6Y{bQ*~Q8yoF)Ncofv6vsUx$ z1O7X+P{HErXb(UPd8=ZZ_;ScTEJTTT7kbG*VGC@=17gR++!BZC0ix6khZzjy@AW_m zBgXm7!zWNbM~QCFgy2z=g6qdbJxXa#n#2%M%U{OrdB~Br*`3@ER;m>RQ)UR(T)1Dh zmlZFcZsneYn^P?2c+A4n&bUmBD%`acf&jNax+m|r?X(KEKf5t^Cf>jobr_Xo_~(#+G+S08O-<79OvFW`X8>I#@EGA(9ef0PXyEQ&D#QxFl6oGqu|+N|iI zzb8ty3v$vzh9>$x3n|-fuY%`Mi*hn{y8H8 zp|wg5-Dxv(u984iHva>IY8&p%-*INamY3pf4hYP6xL`I_GUTtvOkuJi(Xk-dDy=Kd zfvHOtI+=s$?;x^(qm=nww)>E|^bOYsJIkYYuAwIkFw2x3#jR04H)o?)hQRuNJ7b^C zEk&UZoHum7@O~*U+!Z+6UUzm*5hC>|Ou$PxOL?=QE*X!sw-_9}IlIhL=A$5Xu~zsE zT6Y$T-xJ?{;&ny_5m|Q4I3A{)k;YNAFOb8cFe~y^g%< zss@yED)V$ZdF%<9lcT_4PhTVn)l|=ejF84jMGwlFVU^Z)P4bvOl+QwseZ7+cSJxsR zTOoqsQZa=`5&$7^i$W`u>J+MX4RuW zea{fsz<>FyMu{rYg9jz*mkT}^m~Bb<3;wM8A{KNo@T1o}1k@Sf>D|?8n#QkxP3>+s zY9r0L2lT4V2Uep{)_T=ha#ZrA`Ao~rK%AaS8r#R z9>YKJh2MMc5^Ue@eAeSa*Ls0>%)|;H5rf_r_}yBjszVi?@(O}KL3ev+%TH0DPvE!b zIFP|iv{KA~S6!k+CZwl^^Pggr3l1_V?h_u= zKrsyI3%6LuQBK77#HO3c+mq_(`N}YVstfL`!=o5vOEl`2{b1EfQND zjkAdoO`^xdG}xmL6J;q1Ii@^DF9B#|Ao}21;9&8M&L@?_k?lR=*M3VNl!Nm9Y(M#u z(YQ{Ck&Q;md`)aQmpyCg_Ioc`PviD%Rb}*KRbBQHvGzq;jck4s=Se_!ZVIzC-i*Kw zy|t}5L&Gppr($%jZF<6DWI|%bxU>xf7y@y0Po5R%Scx>ASV=5-e>>aF*=Ao57#C)~ z_cLLrs{E^Gb@_4n%KoG8E=NJ5Wt|^#uXlGLSCq{XZ#?cB6ra&5N?3nBJlep#uoS#m z1Hjl9<3}a{ew3QrBUI*}Si<5bC>4XSw%Jw;F-?$nR9u72WTn-r-XoS2xrC3diVK_z zC}znqZ?*f~RjjLv7J`gFDLcdk9M3K&{G$t%B4^Qy`8G5?OUbxI@IC|f&j8&OX z)&;zRZPts^biXD;Ej#ntIfEnp@$v$G>D4Dw!g3N4#}vrQQ4Sv63Y^3N5>6#27aumi zjX2P#c`AYBQ!142o=FDVL}C~u5H0%)&V?!<++Zv&UQV;a-K;fAs(-aq8XRm(cg*jeVwXKFi z)xH>@i|22{wmL!Bpj)SM#S|#WeH0sG#y=C;`02tWtIOD(9hZgpLx1m$(ncmTUU~Z` z2|#z0txTJ+=J&2S5DALFT994C)FEaAmH9i`2ArPFr@w@|zPS2n7+3@S;@lGx(9=?I=%uE+ahN1XiKlCD8Aw7 zjcs)G5BJ)$G+^@?XsQ>NTBK9U>Aj|@YA1u^|qStTR%Oo8s2(y=WOK6=n;@Gp9|)~=BD{xlB8<(i zv^BiY3v4Q5nTBQco{{PR*AJ3_wwyg+Y!D$D zIM&rs7{!9n{xtJ>GiizV#Jv;&F|BZb6p5iW0qmi7`e?j%*)v)$-#h_lfnnqElLAf+ ztRoR$R@M7lv}x0C1%W&;dAP9T43ExG#BZ`i8v0BtM~E)ahU3O3L3NE4Ifu zn%VIhuvZKYYY|{Wvs8YAcQH1LGNtk1ceJ34sFiY3SP-b`VQF|1(;pE&Jt=G8;k!`u z+ZxP=HX!?EU~s$F0!mBJ;Vdvr$I4e{5tEex=wZXej`iT?6A0y=L1-2>h;CV_1B?tD znfun5G+3D5@m70&7{*X=iC`xeRarBADG<#~SONt>i7Gk`R>&N^0hvMgqZ(&OLTuG- zZ6#{7jTxsfsWvme`-_j~;?Bt$po>tyo`v~~vi=tmTOCXz$U8hj-nkT!wjl57G3ltn>Bb^r)ad@F&PM7w&r39OY2x9#$|~Rt=&2rn6O` zU3f_~xt%cKx#aLsg9N{cpY~&QioAp-6X<0L;W-+*FxQ^oxLzRQsE*?QDwh}#H$qir zQ^jJ^URAfwu29Tu=smTmHaOg+b9SEDZUzO}0Rj9r-lB_6RhO@bwa+R##pt1IhJ&N? z=VbeOXm6|IDVgAQ4(jQNsfc#UT@vCkkG^u|2e^OIhq4o$L_=5u`+As+JxiM09qKiT z)1LJ5=l#st*@%2+Zg+<&SJq^`D(~wy7gi;y+<=-%BX7=6WViALWkY9-!FDo7DhekQ zu5BRa5#_+>x9J_=JE6b}$oc9F`o86@poS~@xN&I@Ws4|r%5!4M7#MTaC(<+oV{p!L6l`>Oe81j)P1{JvDLhR6XCel1m4C8R zCStJYQQ}ha8)HmNLGUIbrdL@lB37~(>>V8qP93LLt*S(_z-j9PCm^RP{l9+!Xv|RIz||GMd$bfoVZ&Zaw|dXR z=hzG3x-7H7u$mo>&}&PcG3Bzi;~rdAXX3whda>k>F)e^wH!zeEc>pYs7Xlm+IF^)S zyy=707~7jWc~&oNpuanlk@1*aU#TURc)G^L`_pU$*{>suP0Fr7-o#uWaJf zX1CAWpU))|nmooe+XYz24FB62f~GZ+^>y<%A`Gc%X=%egGGu|_Pr|G6fMPQuVWHd@ zLTXR$hBji>0-)@eh&aBCM|$Sv`!!t^7Ji@qu_O{HI;Z~2*E6IzIuj&AxmWx(_xHbtorQj6`!+Rbj-QIfB;P#aBB2D>JVb4 z2ESbz$Xcj%Q<|zIklKxrv+W&C@|o3hZ$lA5!V%=rWT|X&M`TU;dA==v0aEpX?Ela4 z09wt0M1?#tM|yhh;je~vBLx7p^~ z+)0Y|J1ZF&e>fl7Gr!qd9WfB3O#U8%nndXngq{Rk%g>v`6^UIYjIjN-ZzZ=U&6tl8 zPHK}C5q;8M@&<)~gQ$}|8lM1;+OGLNQ;8>#*gHP=S*G*F1e9sI2lC0kNkbeb{Dk*l z9ySbMAvUA8cn!x_tyM|X@(bqG{bksQ*-^w~##4xbp`hLvW!{6RhW>?qg%LS-lZS5} zkLNOCWXw-SD>K(rs(hI~8$>w)52X_&Imb_(8wf#~yhW8ln#uPiF(A4ST&lTy+AmMf z+1v3FLdR5V@N4(g8fZ;^))ZG=vGo)U0O`ZbKBRvO+?pm2wExV8YD3#CLDg<=);$Oi z)CgdhzReu<{Im#FMg14&Zi_k#P+o?db4a(#TQ>b*ACP4XnUdq7exniZZWECw-ec84 zl8b0va)kx5*qgGa+O_kdC&{G9UaZ=v6OXBQ_0M~b{OA({W?KC1D8CW*> z>bN4|JtZkbOi~nO2{7?(v0p!pXR_hfs(}<2Ay7ib7@BYMxnI&Q)$J{pTO6mDcMI+eYB-D;-5O5Djaez!hI)u zk>R>+p>T4SzG}tA;u-*D3ZC1(g~Og!ofqd3{IFOMll}KH|30Jr)hbo2{72aCTm&TP zrh~Ud6}1heI^iqR&db9{M`5(C`@j-mdAZt|MJ!GbF!iP2*V+5~ZFzTtc`ii?H*%h* zDDV<(m>G4sJ&xeu;qBgvo3G%M%Zveo%VPhOMi&#qOUxDoK2ycm$%u$nmT(t>16raf zi}MT7)aC0=o$}z%sK$fX8%$d5Br;7e+B6Q-=p`t1e}9{X-D#fhmx8KQ{OArawo3QyX93hE z=eFA}N-viV0LQM`RO~L>L7>6FWo>jA`rZ;?T8Or;U05y6nU(Z3Ou66J@_ygz^}Ohj z=3x|tr37l46{LOpBJw3g=%0VkPm2YIC4qLh7ukX^RL`3~tiLK}Co>%9R;U-cXO;|M z5{8S^`aNSQ_8;E^_zQAUw{cTuUq?lHgDFVc_(pI($D0d(-yS8dfO_|_z8S0MGy8o( z0X)7uivD-M5Q2xq2Iu1Xuh$rujyI9AHu`W`zs+Q_i1>#gW1mu!FY9KIYv83b-^d|V&)a0}R)Iik0A`>)8- z?M`MJnkm*ObagX$_esgtJhrRR6MQuL)MOWLZk_&-($w$bK(yR9`Ev<0Wxbu=9{srT z{_?pqDt9r@)t3jwZPcnYD!jLP(~PNjL6s>upu_b;{!zf?IAPNpfJM`#t|Lp;8=XJg z$#>t{Sgs49h>5NO^l~SSCLZ;{+5SyghBq42x1ANb_Csi(kCST$&Go~2_I}HxCl!Q) z6>&s|IzQqLMg$ERmCXSsFx=^*lQ(L)$+fc9H$u(gWpO*G^i+pk_6a_pp~xAb2BGt{ zFR(pz(boh&ICj(i07{REwzSiLm@q0V+qgn-#LfSJ7N$!HG|P9_Jf@O!&~D>$V2QOB zz0eYf!MD(kQW8*fU^k0gabz>QOk|KonpP&lyQ0RX{Ee^TA-dqJ@KQr=#kne-faQa95RLB| zvFA3E>X{D2iX5EyBJnIYV>p;bnPRc{T$?{9Ab+#D7q7g7kbp&6`*vPvI{)3PJCo5O z>M1}#v$e0xTM?+*s_CDr#0Jn`iYSe<=4`GVdDG9ah#OC>9WFOVU?98I7|Pft4Yh3E zCT6}ct>G`FhSHbj*xu<#0RK~WzHbinR&0-g;P9` zw(b`i;mtIL#>PezF}l2&Trh@SstPyxep;w`w(7YzArAxs?JbBu;K!Qwy-{`PJw2kZ zk9>|0VpoN3O7C{B5YmSF<4o@FiH6WP>61v!A@TePoE|R?w4*{N!|DujH~(d2iRY#@ z(DUL-;@~983%df4qVytlvL(mhKQwaE!uV?9f2M~Pk#zUsDqnAb7#SXlqebO8*mD4w9A_lu+4`LPtZ*Hb2;jfUr^Ve%-3Nf0|pi{_qnLY%7? z{d+)!xSlG!%13G73dmUNtFS(srI$QX4l2e>?@ZxLZeFjDf!m<A-CX{<)ViEUo#@QZnf%3Q8c|^LOtjLFx(^F};vgtO*j|uu}#6p6hF- z;xw?J(huaS3~;5e;Dv}))RFxzO15;m=qM|u#@2DJ7(%Yyv13&mVcZ&*9m|HloY<+M z;hLoPzo}WsGdQ}_P(4UNDyj;z^KX1r#Bh5q84)fSs>ujBgplL&QFB=Pbl1T*Sg^or z2Qoa1lXP}(bu!(Wg>%n9Fchge-|az6_^!&!zZ8oY4O-XMS)E{^_O3=x)OzK#&e78s7o~v@z5}M z060P{`h=8Z-WsnqP*1hu4gmpzzzC?#8oJ{q((z?rwV=`)87(eLYMy%L`KRK(a2uUA zV63XL`U~5@eivV70WZ94a`+d7rSE#TTEH*Lq~?GshR&|dZOGUl2N<7*Y_3@-hp0L} zV<|a!Vk{&%@AEv868~NAs;rfnbTBfbW{(x_IUd>(9}Ne#@8dd^6Zc;aHz1&*WF=-k zH^xg31B*s+$q^i0c-1Ng8kR~o6=U*r*<%}KiEUx-lw2zUObePZHe76DN znudCG(3osS?S679%XXAR3>grkR5l_kVN+OIUfmhV6oPi6YT^0U@9l5k+&XB-IGXg> zXRB4)?Q}4VI>b`6!EBF|DtwpDR9xCFU;EHNH_d7NuK=;kjDnukuHI(&Eo98Q6C<~D z8eude@R{GG<*t@cNA{r@>N=rhXm+>#(H%?LjpP!lJ|g0dD-a(+sb@gE@XW2P6>qo% zg3KC5uM*^g6r2n1->jUOrCI5{8z6ORrD06!8zBhE{cdC~$I{{k?(5k;6ip4cR;UtZ z|H&8P7Mu1k1N60*8v{gfPY0e71N0+z&1fnbO6D)S(X;vv>S5A!1%ubhcGsR{*YXR6 z$coi&TW~gd;F7j&><$ot9(gd-U$BFtM_1FQntn!(;|R5++~GkDdiP635}W1)(_%a< z1D6Ua@h?}iC!Vn%9$#uJ>6`t*%fGrO_2l0Bfm%Q4)MtsaSs*lufP)-o+=#(5sfKah z0Ok*I)8Z#h1mX9V8A`eZgT=ydNPrP8^XqKi+q-kq$;`!aF)^^M8dfm?l3dzlUYBvD zI`~&;g)qD=fGe}r9xj*PX;aN|b~2A8o$(G~s&mV;r}Jj-Pa08==8=7O&V|NU3 z2Y$7aB#ur9;=1#AZ0d-!J%p?zxRspjd-G^$j@!>^g8^XSNu_l#n5OuImY&7Y$;*qC zXYXxs*gl(@paiO4^bWJ8_cf7$`a^@zwXj;N7-J+%8~E895r2`0fuG&4leDe*4u0(n z{`**$zEQ0Jpa)>Hy%{w!ffi0HJyLwaOWK-0+08clWnRO8Nv=@|#n2F|&bW&~nXLa4 z%Q^c~4*|HvbAbQNXXdaGc$2?sD{l^EPTr<2|Eg@b+Iv99~Tm*ei^1brUtjpf87D%U|1Xgi*O zHa92JUdUja&s*-^of(RLMpvo)pn~3o=?!T${f1PgpdA(T^IwY);826-{|?z$d6QA8 zk@zFbCiisk}?XU|6gpv@R zhGogyr@%j>jWhyh=aWidV3AByD(I5J!fNM!}$9$)m*W^A+stc z4(5#-Q`2Un2e2&dDYb91)ji}Mz{pQq+Ia9N#M`uVXMRoi)c_yIDgSiAA-Fi0>2Rd> zG*Fdh!oy{!|HH`^pWBDTS6Hgb7p@(4sS!JV#lZ&%V~ z=n$f{V+)v})d2e!lOYF!L#5QiIrOG(Gjr%iT3cnB=~7Jl3kTTlw_&01ZV9Us!ok*< z=bG?V2>}GGaZK6O<{=z-sf`wEfuky_ci|>Bmd)sh6TdE)_FLN8R}WMie91t2|N4E% z7a>L!vfwi&dwPIBY#4Ui;MB!P0kU%JDX)Hp`t}a!EEFxlVI2R z`+2q+5P8%)&uG_v-QJ!Sgk7%QzHaQkIwCwRTuMIKKfOCj(Bu+()MMi?JY|&nVL{50 zDX+j{-C&WyFZe2AxhZe#6rCA$UxE&D%FYtkm4VgGcTWf;KriKd!fk6Hl^M2j14i_a zb)2i|mJFRH2_KM#iq}sR%3`NP6};M6Lw$8wg?Nr^sse(|bVw?4Waud^Oq+`sF`x3X zY6y)x6)bt%sPpnkt<^Bp1u>5bxqifk3NfV* z1Mf+*{5x(L0e9MyMtZEyzxlbtts>AkH}|85_c&ktYGml8@^EJ1kOqvoK~&Zb|`=Gdw$t?kd{1zNgTyMTX`x0 z<2pUn!jT7YuW5=@MD9)o1q|(Xgi;io_t;it@s6b)Q zsPl!)IH2kX5&pNQ+rtsZcy}$p91w?eC>US$qqy=3FD5fWr3kU=cDMe(En=~^DjHuY zIY%1RQTdgvcq2`!RZB%f8k4QEi)OfAXv=Giw(&vU)y3FO0U22-XJEk8&*l3x9(<&5 zt^zH9!xM7z)sJp~?tqhO@iHsiEc>aO=q;`mLCSP`&%f!j zu{K<@x&hv}JJmTL+V{6^2%jsdvXrOHghmNUAwT1uy{tN_s?bXD4n;G_hEiiw;+x_^vdUhK$M^H@#KpvY4s_<2@ zY9Rnd($GFL$lZ~^=^moCboL~p+$Uk3NfQ{NaWDMxH8E)_45D-ek83YVa*{(Kk!wMh z5f6D}!UT`ylH^lp)#H5Wl7DstMj15NC7o0h*6_0U$37xCnbDsg*pG)pyJle&8d4>J zW8!vgIllzAtjXbXz$#EjI}Zkyn5Ghi`X>Xeid3KsE{jXhN$ zSs3)j2#E&oi3V~k+;nCfR z5ztAC0MkrhZcrz@l1NuufLE!*p4WGso*y6@W=JA{B1n=Mk`llGvSWrMpsx%{w%u+s zU358!9+yqIr`Q0uLm)kP!^U(jfRnVnUL|G4mk5vOPCow$s%M5I1l)lp|L187&)OO-3Z_SiM@kkfwlx7eF1hLT|r1SKvR2- zAS5X`kl$dRd^wp~-x4>LHXXN#9A&}Uit{k2Q|_P5$?fL#G?Yi?`Vpu0z3t?$KTnYJ z-(RM_+ZWpErO7a(=cmXDjtlm!mw4A9AZH&qY2vYt`kve%u<8xE5gg<1`*_JfbXIst z;t6%czqjV<*)WHu%ACCP^7j6-zOT1hL0fg?T$WJfTC5m#6Bwk!v*C67`4b5+i z-*%I>m+O<5`>S^zI>dEhd~@1D=vgyyCh6Az3?yiRxveKVKNXt|SXl6UJxfnUHg;xt zO8PkxgYwTX3fSUF-tAZt6@X%8<`qjLFwOa!yI-hYuQk5jIFCTKD<{soAHLO{0ncY7 zK$V$amW+ig!4N;9?@C6|yP$|L-@96W|FB~V*gN~%!BjaVQ=T|oU}fT14Nv^Gh~(yL z@WQmW-g~?rfRjUW&m2J`#%~^+LjJ2u zV2BCsFhtXGxQHiHJx_7-D}M;9FQ8%b3RuVi`MXEsEJ+n2ICyhuPg`I)h3JLzX>8XZ z>0*3+h>ily+V4I(&pjKPJuAN_ZnaK++`(?;a{A^w3^4=Mp8Jdgqhz)c#l(<^U3l^( z-(M?rBq|>EvB$8rhJOT*jK-Bao}~+1Jq@g|r$5%Xvq;SNys94Kn4>M%vN`=oT0pTW zx}O)iyIWpm>H%p2@8F|vftEZ_C~~m0d?d6+!A-XQkvz~esE|dGushVV&88Zbc{CN@ z0kHJK7;uE_=((cKH2k#Cc+dD0_d0MZfAh8LA&A9^O5ue$(`KI#H(J-T=gOHYTLbqu zY{|j4H;d(O5Dfa~;aT$BB&r%?fWW_bZbtvkjy}@RV7$ZbjO$ps@<#z!4gz+~blE;! z54}%zJ4;6$G;}W-su@oB6sN9kc={ik7{2gwotee<({qyE=5KX{$comi1pTalDjNRK zzlP`D&OyDe>8b+Xjc*##lf<>47gEoJJ|2+Gr!jK9!of@=Mgd{LwRFM6g+P~M=w5n` z3KArRS?bb(Qhu@t7hmU-wt0>`S&>w z7^GJ-rYN}nPFF|jYMbgu5X7p9qB!M_?h@K12W2CSoa^A*7P)gJl(8tm<40x=8Iw%U zpJSE9s?iJrEltRqiHumjodEsNCL_SaXd=K=1JBSjM{s(I9HcQN>317sOz{&0C&mrn z(`l3)2z^+;XWKzi2QW6X?Q{KzITKS{SM?JAroF<#9{o8L?8$Q|UHU^izeE>?AZk@%_bgBY&~YDCdH#E*_~kMm%mJ6wl^ za8EGF*gR{{dF{GNyx02}lNF*!_ZqRWxn$2AA%x&3j{yqPFkV6; zf+C4*jch#~Pb;2dAL$incK#iwBz1vdZB}r5b$m&R1IY}QaBRupH{`fS%bbS)Z2bbl z7j=G$W`#4EJkWn3H7w-2hzGGW1kkDhh3uM9-yxfNL|vwISEtMdP7?p9?d0ei1;%b3 zY9B$y08sT>;r*N|RwjQQE+@q7gNE!os`a;izmHl}wyf!xmi2Twf3S@fRtYg&>QsE? zCHB0zJ{k6E?d76fQae&Pk+83cnP-M~4c+hzbtS@ENdo_W=-4q5VdN;Yd*Dm+_b_qz zJ;#XozECG1D0Li+IMh+5qQcBuO(s$EC{8A=v|c)Yw_6eMV1bdUv`CL7mxx_}ck_H3 zVKOm^M_|)ZONx`qH+j_Y*G3@Wi?{aYKRK{b-{+`SR3G~(0wXUx9S8c}g0QtB-l&_h zRH&DczsQ|!FXH4KE?2a+0o={-jMgW}OKtALc>g4m^0cuV}m)=_W_BSn6HIwJ>bH+q)&>vj*jzzpyLOhV|q5okS_8=B? zhx|Ebf^a%(1|NxQ9nO_XCnD13@bN>Xcz+o)c(t_^MbRYt5zWRC7WJRqN^&$XC%*{B zKvZbgCsr1Q@AVra<8gr4{AUF@tnKuX@rdA4vVk>#{t&2OG^a_~eXhYsY=Aw}m|0+Fg|M>7*vB^gOl z!k@~!@k)3)DWFVP=&U;emy)*T*mTc(ct}7_LR%|fJUNqMxZcQPO zGSC3UHl{)JAJ=F}Ps`3dGRbZNIU7Bd-Ep<2Ri{<;Rs04*HG_*N&foC<5f6QzCRWm$ zLK+*5Ha6?i;KMkZs(G72+9poh3_h9?A-Y%T%1R!dYJa~~1+APbpx3$+2O*^cYpXXe z)(40Td)VwY?m*_7?ThVG%WE@N!apE0VRSL)_;&0nJPXbEcC2{wxCQcRa;6829l!lF z)w$iQ^y~>S6ar#Ca@O+`UqvUFo$z zNUI42HSBOBiWh(Mbi5KnqT-=(K zEIj0ka6bupE%xOqYhY1@ELmZaK1|@k&}U1mZ|uy?2#|0P4Qjr7Y7JQ7WVr-(do%6@ zy7O|5f5c2B?%sKJVXs3?&I}t5(bm}I7x43>C9KGo>7rIl%o~XqIjZ#ldq3PB&^`e_ zPs?en`X$MzjE-veCP)bQ&$6_UohX>AnrlO(#s+V#g7v%j4jhj*zh)0BggVB?ShTk{ z&j>Q1X=gA=_MR`*3G13j<2)Icezicuw25KILO?{p?(RXL5E1peYrJ+~{Ybv{CjJ~1 z4?&~Szsr{P7JQfyto8N=&UC+0X{QiD_U#<-&9XKl^-xIe*5=xva(`gas)J@6<{6$ma(T>jGeQ9$LI>72bOE|CN5|b}Kz{ z3T`G)Gs$cBex){xFs2D=r&{?rq~VZ>i}7RV1EyzYAF*&K5TPB97l7n zEioc&0-i@WYSo5U{vS=}7@S$OwC&ioZQHgcwr$%_?1^n>V%yflwrwX9eL3HG&+k>$ zwX18_+P%B)zHXW+43U8)2-0#3nlMn5J+lXCMnFJkOuujqb9?;nq>SIKg${V&9L=(!F0R9letAWVpy9mEsw<*F=X#Tb4-4gX z_e!Fh!^o&8VEbIbP9Jn~*l~^$m;nesG&Q&F1|oK0l#THbq?e7nUT^q#h~?q~iGc@c zG%n-QF806ESoo)k(a#{5*}7mBErJd<(z&@NY5>b%D)xsCFh1EJNMS=f*}J2PF+@3$ zIY+u_bUU(|s3Zo#i5U1WR)+CMOQvI8SosLDXrFAOxN)+nJ+VH#GSB>(2fE}P>jplN zuejt#e^J2GfK`|V)9_$wuq`rJHo22)FXCHZJ|xDA*Gf_)O~ol0oEy?cG#4E$RTZ3} z6F`qEmKj6~JyUk3$+)KhWgBE}b$Y|{8RkWH7FlOf8}_u5qP ze!ok3zZS4#;Vp@3=brt!2ubI07a!vTF{~<9tB<~C1481AR(R^^NBWrKG|h=J0rO z8oWFh4!}}yley5f9Z5eRkPeuB3+3XMss2l~9QgU>+DExf=t)m4$gZo`c$w1ThO5)U zYM%Z!+jBmRl?2#$ z7=Q`W!`6(5Cp^Hm=;CV1`Rx9ols_;&J~nFvMKF+#9=igCI-VjHR7)k`Ii=uJ!5_#Y z0KUuKi}K?JUI)dP+>yzSOL!c8BjA;Q7ZpjCMcfWSSlO4t<}EX0<6SQNGIto+9)HHF z2(*mE6Z!n!H~xUkAI>2hzA+(juaS#T1u&HMew`6;pK0oU8-DAY{K_N~+o3RCb8x(vQX-WaJ}{{|F+WRZXSxRH(3 zri3l?`!ugMXPf}XM>NOe^E@+a0Vv|~4NX{pkV;WP@sOz51@`V2mB6(PyEDaKL8SHQ zd-5d|dydgOW@^g<+hghreFwVsn8d!h4qt*_OCX7VTN7lq>mR4!~K5PG}IV-AND75h6N|ms&j7xAO_F*oEo4D@BGL0dMVs zZYEZD-#TuxWeZjSB(*x%lw5+;UpJp~VzaiQn%wYcS$Bd2nzdvSKN?;YhhOH9HV8tN7<*gU0V6DKg z_OgebbuWmf;w;+#>puN>+MJUgtqK2(q_| zr4ksTmce0MqlM~^(yQr85;u!AF9AEm;fdr~?hE?QV$NZiqoc&^2236J;6-T)AiTL# zx!L+5{Zz!#V*_)x+=bbRR4|eSXd#i7Y&tOl-eqMb1}ta4oIC?X{nFIk!XA1-v32DE z`o9F^qNIUgqxN@3^UQ1ek`$<=O(T}|)h$+P7fO{8S2%G~7+Ww(QL_!!?^5suSL*}f zuJ5bLue5c%U7o(So`$m&FFpCY^XJCeQsgL)T?Rx2?zO}`jUO_rSvF}7zSKvgM7Tq? zLbH~eJLq-IMFXgfueKdbFub(XDjTo>?=g_uG}TV?0ko{rm^&lNo>m2|9B0+&X|j0H z4gJz7ep`QJ&|7(T9L}p6Qm?O_k8nf$aG+42fl-A+>V#$%AV3Rt%NI>9rzQ*SE2g^Y zjZH8Pryv5vwQ9~GfXqR3(O{Fyaav2>I;yT(%Twr8l84k$gkyN@Asg_Hv!et6O~Epb zK-oiLj_dxyOnS&<@ilm*ysYhHXwD(b_Za5bhL=S8X(=hDUhX!CKx$Lga!(2A;-x?- zFdiJ$Q!5Kjf4kO}9{*}FD?%iDkh|m!cz|nl-Y3L_IL%+{+DEnXlijMTpUSbbACFr} z8IRcJHi*-A>*VPD+UY;m^zM)cM&Op~AVk@weR+m8fr>&P?G)_w({1 z>dN{JEO>XWyHz8%`{uE!cVcEiRJs0%MZXQTx_G>jnV@hrYLe&8oo%rLfUvhxryh%T zPuM$rox3&-lf2;3Bp9fi>jvYo#MlSlM*@M(Ei_HXogk_y%rWCS!*BTN3eRJ|D=iV>D7D8)%qM*H`|O2-}cfelhvE9W>#Zsd*yo# zcink$o2hPi62%0D+$nA2z+MVYt4l!3U<>XVW% z=`|rWqwIdL)#C<$o7x&(w4n)Yw7#v|xOU3fFn=Dev~yfW|4VNPTSr`o3kUBina{>72nK~E}aB!scPjR#JR1;J@O0TOxB z)n<(_3abO5aE#@Gvz#W|+~fACg$%TO6l9FB!Rc#AKEw_{Cq2B9BASTvg)$4HkJL(- zkky&I1BQ>feg5)+2I||DT4d3xT*?x2?h3S*T7I0T{1^3aj2m(P_WVMtP4(2 zr>hfglEnZEW)SS@TEn_#LB9Z3sAXiWWnd8>+Ymlu7a8aX+ z!HL^S1w;=be1EztAPNNpnc7_~$RQq|e*L3(yR;gyAl!Bm)Zgmm1=K#uS(ds|S-f0i zAY~6r%!m|aEY4jDWcfjhm>0S0gf*!Hh&9z^@nb+ zzvaF3=JsMD>&~EFV=h6$o1GK|kIrrQlh{AZNU|~^FAvUtjmFn^sP0?1PmhG4!E6NcbG$Fne zwM`$9$rw%*bvC?ZJQG6X%H^vNmzD+{Vo$8N@JkE6#QE^D~00j$*>IcylJ3`A(ZkNof#XPNv=O_D%fWWt0y#4&k@kip|j=cd*7bW&F; zTWahiO!a4LcrW~LF#D~s&x9oD5K=70@kl2`$jy>UXI8s;uX zcS(RE8JcDJhDx*+Gh5Bs=mRX zReRxWR_(FC&!Vv%5BJzv!elBKG5+g%S6Xtze?;f z{%5B#8_+>vL~+LL3N+%R_+93vAK`KcRy}aUr7P8{lWEFrjWMM(0%ZRzRw%4`kgw^O zi>%5GZn`LCDKr(lASB8sIl8>N%DGS&u!CdNLmHiEO(x6LAfRf82I(bg@Gn0~OlHso zfn(_qtKJL?LLohS{&OUcEGcgG(8=Zc2t6Lm1c06w#@j_28H6L3Y<_yk5*e2_|7>-F ziq3af7cgV0V39;MlsN1gFaW{?&EPcKk12ncIF`rv1e)+Mb5;fZ(AJA&0>jc$YnPQ0 zy_5Q&3HH*rafMQJk+R}YrRBQcD!x{Xr9|~FhHnEh5AzYm`pD_!>#0fm!10e7ITu85 zIbfd3xZoO9jroj2E`6I6BQE08h}jD>T;}qrGc3kU2J{+j2@};Ltj_>&)a-J9L0D_x z;yhS3eEJQwqK0{gYM_S<-1NZ__K zi`S-E`ZZU!04a>emhT4{#Np@7WSm3W2Bxt|OKPysHUOrXGTPpEu>0A-%PvjJ9njU> z`FL=$v2mip_i*<7uyxygk#q+}|7V#Sae5|FU%ybicWAn!!JTOLNfsq!NBH>Px8$)) zf_gaMT7iMW#GmTri=wd1;9`uEFZKFmWAHBHLc`5*Vo=e9baxPI1mX2!l6*LxbGOGA zbdpliyHq|l_uq#IbEp|*h;ho09$@Wkpe2^t_X5nuMY49HrGma%7@}Z!pcK6@G`d{S zvh|tzDPx|#ILQo+BUblY!7SNmkEk2XwNt^jb+VFG+tW6y#w0o;W{Ru%r=abRYRK*iL+mnIQbaaEf)KVg^40dQMs`?_SpgU!a^dc&Bdho-%amuqv1>EZBY* zU07yDk`FGiE|Lj2K3y7wY4#pnl0d2vjNaz-#toP5WRtN`Kx z5c(cbU`r8T3NncSNaI=S<}o^s&(JGRo|GZ{dqPR6c5P)EYtLI78R~@+QR`!=ob&|Y zPf|4X9!~MjL*zBEM`90pKys*uBEjBf(^-XM9>Dp zNm)|me$;R`bT1^MFL!^?U6dHUEYf#n5De-_iuYG#kiyjy2VkAcV4a`?od)fAi;)Ut zqj=FH{SpqC-KqgrnpwjNv4M+Qp?Rn)ir8AAGj4i zIak92ss51&%837J-~OD{rBrkO6gVWPuW00Z>kmV*r=Y5jyWPo3s5oKQNh{%R(@~dl zM0a^W`y&Y^Q%}qqMNryqu2{AGGfxsC&V%g zf<{ymdm^B_R3v#xLavu62tz}Y!4RjEmlFKjMnP9%l(hmsg;!^^8u<%MBC5i^bc3P< zo2>-@oioh`VfB!2i#?z<6tg-;|sOz_cVzKrB~tN(lw?rMu)c z2ydOo(;_5bG9h4z^}*@Y7`0~qGy=HBe`-Pcj(rH)BzE7#-(Y)Ne%va*1+hr@a>NJH zk1;W&dc*FaB842MW{#A$({sEfw*;2WSg!5*tB_2XUSe|{v{Y=s)%hOuZVVWm^AVRW zMik77yaX5okQUARrlQl*yt66LaZYd|56v3wfX&IDjfGqqMd(;`z$nt||D7ACFtHuju{S z0q^wCL_v}%9~}4wo=UNda+<0+7@6^yheSez?@`>b2I-j$GT7JHO0J?lQzoOt&8U_B z`?gs(=aNjE3qkINn%1To-&SjK@fh9FN_X4?sv=v8ve>$X(P;PQB-dnA$O$3dmFwZ> zfN08>SyUx_I6Fis%6Ax>Roc2a7%m_c(LtKGg+24KXXQp-!Rz@-$G|>3FdHlWofA2Q zl!W}f9cT7+p+!hrFJ;74lMq@%<8u5Zn%_&z*6Q3X!{OB+B|Poq<~Z#{GR=Z8X$zn+!&rwt$M2P{VS82jOQ{;%u3RdG9)H(dhOvQoveJ6(eaiFv(0-#_T)fOSJXfXos9*o;Zo=W zqenrnT%3p*TSjVy0#sq*rc`FaRSomV%>J6g?dhSg2lMNr|0<=Na|2-X5~4BY_VMrc zg4XoTw7#J8pM)#An#sq>RISF?>>|z{i!?7*e@fGtmTLmAopB6*2eGyL@TN@@YhAT7 zH^^WdTI3b?GR8|!mNPsFUcp>fgtQ#N_^9_S>#cO#E#^-9PtDpeKCJtuxVJcxh)MS; z2W2s+>QOh}Xv&$@5;TDQSmOklcu$=vh^&GS7(6JYGK4ah@M3l;X`6Qi*>ns}!{|%- zKl+=*bJPS%u&#+zaka*ZwmE!OaZ)p(7gZ~I*TbhkD4_!FVKXzP!^!da5vbi=@FG!A z6re~2ua3PLN>>mFCsAn)E`L}7t7%7fjt$r&Pk$cXL-wcy5i0o5P!k?qoaV?E7w?lB+h$PeQAregsW+5b! zx|>+qcl4VW+(8%jU=Pf8atdatsw$0jbmGI76LbSg#T=v^p5Ltcj@#k7(Ik6s-k4iC z-vK}SnA=u*VMu_)X$4>IwPBtq>s)`wVhwe6i~{%k)q=IH&EL`HjAExYF2eib6E2%s z!73b;vA4n1%3|VJO^Uw@Lt!WE%c=}s#ffz|kP_Nf zy{YK0N@wdN2^ygZlnr+@gy7}0zAMWfl7RQ>NrK6v0&M}xxsGe|ttl?Kb7*w$k+puL-LWu*hr)&y?Vsy{kjupAFxd8LP2&d& z*utfxqnH;>9GD^aGaUFReuW*ud~|Rj{u#$3i4Oajf^4i%R2!@%Fh=H8!wQSCJ zmg`!LMXb>eM>H`OxeYmsshHtnS1$s=XBqEt@)A}-8|;Zzb6E@&DWme1<+O!DY~$K# zROEQ%na{gF>1~Y|*J*(v4hkgPQ#YuD8rQC@%Nqazu4(2#rW4U4OE@AoMXXY-3FL~V zFkvAhQIaws^83a5pnrAn@2pcwkl46O8(iDy!c)qYK=D20QHttPprRv!g2Z58Q2xw` z$xIY5DfU8B0J~gzW*=Yl5Ie8 z3j6_rV7_qgl>R_ix(Z_SAok+$p{VLst<)M~j^jXoNO3t#4;licgD;z`ut8b3G@DGd zq8O1;kk?fTwtk23=&fsIx{Ua0STbH(RDt5aTrY@gRng?CYdwhU)9kH}B2@JW<13!VDgX(~>Qg#} zdV~grT@&P!aidQPYns7u!yJGlsfwCuvw$22SS?PDA$6oC?o`&qa7hc%P32uQkqLp& z=pjfQvjLld<_MK+Wa<~hPFbckhoTTULoUvt>g1x`#Yp$@<2P^uA5NV8j){al!-NAw z3L79GblS!U^3PO!cM40~ z_6f%IXAO%Y@yT|1ELTY!CK_7b z4zgNH-#^P{DpeiEzYg9hb{qzlQ**if>eo|y2|-f+#pc?*Nv5B1{8IAu;az1%EH)^L+mcZ8uN-KApZj!A;tWE~LYKU#?eKNurL$!6|i2A?E9 zeVmV6t)e|y>2?VgZ(Z?AVb|>vYPRIBYj4TMAwHB)vGjI6nON%K2L06KLxY!2i;=gQ zMhd1N=o)nf9VfUS^FLR}i9`-Bg`(CpqS;M_CbolSU_#IL0C5LG|B?-~y_Q7oz0$NN zA4J}d_j!O#x#cb`X+&;9@KF=nV=3q)H+**q(27Kasgf%hnhB*yJ%%YC)b)AHZZdW< zDpmpNfbiA(R*955cwJ**h91Jr&FQDznq5RKXPQKCI^s~R6qlYJ*Q7G3fa;?zcUD9= zkq4T)U?4_iw(PPQLfQ6;T2D#j04*>A!+LLF4I4mer4y>^%F$sLI*iS(`}I?{$3LYu z%K9lc4D_3!O?fr?n-eiP?u4QXIgE?l~3mNY)qHcG8=J^mDR?Vr4Ez!l8}ebs#;7FC{)1LFAomX zU2VWpTU;MxFHJOa{ z`P4Jg=>W-+=PF5RaGys18xk3Lrd)sR$u*&)XFBw|Koe_ZJQdj7rU;+y8Aop&#Mm%~ zP+UIOy>%nO*UXjJqDyg80jpPiK>(sAh9`UxFkj;YOjF@J+W|oEh|fH0Mo`?gozA`V8eRa6WptcE7FUPwt;20DQ7fRX6#NbG6nJcF_YjDJR`4*zBye6l`Gav* zu%L_Kr6~($U`w!PB7t3F^aGG9bRyNCPI+G}*m&3DPm#SVYx8mT%IDI7P6u!s?1q>o z(;}B53{09G>GDL{RP0Pn1^^6id5^~2o)DAOZxH!Y*ymW(MSE=q{_znLr25>9-4qB~7B(kYrQ%tgrvI z-T=NJN01+U-J2Qd~PI_?dWYWq$*L42=Rs44w7S+@6oh#C+j&iLh_DfwE$k-4@cUf962Nk zeFI@c>~6f2}k7vd3mKkE>-D?vFGrjEYh6jD6?t6ThMfW2RX$M^$Bt| zzFF$<2`V2H8-@@^B>Xg0ca~4Dv@=bsDlkOZsjSXu={8%+x;}ojUkm;{YsBAQf?474 zQfuMWe)No(1o|czzx{Dg?^j2+_-Hx@N<|WFe*E)$rezgl+ZMpKRLHZ7WzKGc<8ys? zG&KA8M{3x^LGPfA@Oad@Kaz#c6Ek~MTExZJCgS{jP&(Kxs)PH@~hk;-GJ!PQwxVEb3t~g$Q5_5|LB4O2d0nvP3RB*W!)?c;t1BV zPETnxFpNZkt*hV+Z)ws%SrVO4kWicBrq^M*Y(%Dn;UYjbg+tev{W~`E?=VI;!2LW2z0w@B32h_nIV^f(Z5$H{*aZ~RkZ|^pNmuHiXpdmw~?I= z1idsnr~66AgLMjgvTM1uF)JDn_H}uv4PtLX+!&7m(tPTmrcPe^m^*dStG@&F5M-Ko zKR$|*k`Itv7dtcH_x$%-FSaMBES%{4PwgB#Pa+kyg-FzCAMLTs6ztEzs=E&kc~ZTc z`9TFd!=lkPM*HtfBr(kKm-nmpKg?8AKIJn z!D42*uZGJUORAO5xHbAJJh4H&9sU%5MV$rHPp&VL>pUHOD!|@Oln3@X5Zn}LMP7H|p1)nLHb<6tGiIGKHIam@r1$@7~4O9DOEbf@85`2MUsbt;u| zV|G)_t!8Yc%e5?&!?UN+x43$lADX>kMSgkh_5ixfirXB4)rJ|-R{Uk$^-I}HDK|i$ z!bXINi{JuB(wgC*1J!&x86U-O2f^z}eF7^FljDMVPTE9K$*f)Fo??O>se>5;w*y=r zp~g#X6n8^2ZTI~^+TJDSL~Oo#52&5iTzah59^29vb4yR{b4VjpD40#BVe`QonpbB! zfTf%uouLJ{GPqBVQShBNZ9TH>V+252_m<8BYLVRT*W{v7 zSo8MrCp9T-R9)C`bBNDB1CP&d=&IXMyX^Dx^m5Zg=$zlcd+8ct^x_SFM%YcBy@Ud*j8ebCs(1eyIrG)_4CuiGmqD;M*B9lkhN|0{|}!Jc&9;JVMP zowYlQnL5Kcd!M50lu+vJ_5x&x(z#gEusE`QT-iBwuHvf0fosrX7v2bK zq6$U|fdv>0)?>HbXoLmiNM_|SXQlQ|wK?`u231^Q?csi$pDqaQ=;wTTy~JtU{OXxF zAW=XgkifLcE?b)z8Q~I`P>|#J5yw+9=;_IgseYYsH-7oXPlek(k&T|x95!??MaJf+ zRl=H?*s$uaCwIc!l(A~=-_bbkJ6NoXA7KkA#Jv8+)JhBQff)h}5fU9Lh;{E=*<{r8 zAgV3Ax14!x3NTI8@v9`vHu8D-lT4?}SKf2|ihk3&X+cjsMcupDjJI?;J~+xyncJE= zLXSIhgnD%Ik>D(v`w$p2#PUiLJ?Ulbx-m@~^p5xQ&h@h7O~xH}JNskH7Ihe+tzmT- zo~slfB<jiNCdd9V28FLvzFP|gr|tAciJnlcT8tF0)@e6bLp+i7 zDU+^hbh@~>J~!yR=`8u&i=l7{th2MOafQE?ZL&~HrRSRK9Dak|WT5hd-E298c`aqu^mXZ>v?STS!dLEW5;kB8!6=Qd)I znIHQN-&p3WjzsEqT_}CxILKnhDZl?LRHDAY{LW`9<^5`D>WCh(^%TB|4IZR9|13uc zC6D*y0bNyj-eS;0nV3V>{)aQ>f>$dhlrIe^VF{I#6FMl^y2!pP7=zbW~9W5 z;Mh$J_(lj@C{cTK$km_dR0xZ1JeP8{%g7wCWh}ydnVFm3aZqYHKemXh;xT~MFne5& z;@+cmkDyc>{j(xIlMs23gG}8xe1GNy!bvhQoi=5n=z_d>NntJQOru#L-}bD)3nhlX zz7%IY$i)WR6dBs+|FDZB+Y@ojQrbJYk|wD4WycIw!KE;QG^WI*gaQH-KyredGMfxw zCp8BLBI&8WLGlBUZzK(^{~6XyD>FNiA#m>f8tZ_*Bz_OBOenDsuUhyRn)lMEQq~LG zPLA6SiRi)cmNBAs2!*!Tl!3xBoEfvo1@14t4qfQF9!_2!%MD}<1+P*e!-lAx5E0-} zo13{xV{}!grj%^l-Rs#ToI)F!L0bjLRQOh9M}I3D>M+zr*Z0MehiRgowCDQJUO=$e zfVwe?3Nr%;-E^KDCh|eXXLh(c#=-vN_uIiWNpDIX+-AW5-bPx)l7%S#B zcpRhW4z7>91rqWE65}gXnPf(y7*pQE9YANWm&Z^EFm^oX{Rh8f#Po`{p0`HKWC(#k z6&gBdGB7eW9qQZ9JK%A0p^ZZ_42;mbX9;v2XR!hui6-8IePK@;TOJi@LbIr(0sU1> z3*UL+2wqI^us7aL&0qAypYH_7xB8Vujpt(7DE=pJi!X}ESQ$XP_sD13-roXC^Whlh z>^F>dg_-+`o>^lt^iK!;tzpPEks^Ubab$lvS)T}XD?Zfn4~1JHT+}a${9MzIr)NkI zo`0RHL+l4TP02il11Et#BM}$0Kfw9>18x)l_Rkz8n!rNp>FKolm!?Yq>%?xPGar|` z*JWn}dWjHCjVzrD(#bxVJQ(azXKf|JG0q#NJ+@(?>HM|{99+|;7>>H+8Y|BE}L-$`dNBS3xII7K)|0Gj6Ag_ zVa)it09L&v_pAy~`uwm&ulhJJMDjlhT!mYtkay~0zpSyarnbp2?EMS--_u@XSC(cQ z38OIx?@QXS;?5gc2(|AmNZ6h75P;u^ukNBMq@Nc!b6XNxWcpf=ron}jxI+EU7{ zX}3EV308Q=luSFcvoKSS3DCX#z3C^RG}Mk$+oN}!ur^0?lja$qg3HR$3t7H*%sbl0ZeHrVTMYCCfIfnMnC}ULgK3{ zJunk|ON~VBMMs7KFw$WJH$^a>rDP1fVp{MZsGhvVd37`vZdU~>0;Yfby|6AVDo`zJ zy+$XXvpAc=6tnDD^`$AV9R*an<>Smev9r5Hqm76~iQMl-E;7U4&eK+=8gdP&gxV@5 zAuA(fWTI9+>RuWVclu|IgkyZF)lRHl@Fk8mhPaU$D=AwGz?VXdZF_akH_nQ=G5a(M zA4o#{ve(~MWbV&90V!36sNv4MDL6sEo0;rf2u5qBa8_tDw4eGY>}DI{N%Fk=H1^e+ zGFM1dh$+C-eQiL)m%2Ntu&K!86||NC=`BKP^j-hgzgTJqW{vr-?1~dV*96qJccVs& zN;r|*S+89RP;|UeHq>gXg|NZW6B0J65&VdU89cAYQ;4Ec%Um(q3ou~7Lth(=n=5;u`vY5l84ZNT*lEf>GPHJ=vQoP;Q;cpg zatiBTH85=w3DWKCy0e&Tci;(z@Mwjh<8oiIGT=H27)4;W&COYik&R12GSCxP2I7pC zfZJO#Z)Ah2dT%*sz}v0wf%H_V7H%Qo#uEJ~Zl%cZ4QVX!#76_w5#*3#f<##VQJ`bd*F8_bXy{tH9O&>AJYs z|66pjF#Wgatk&9aJ{Up%!9Ui~Fe-wF9Q%)FEpJEXY%O+>r4cUcP)CG_4G{^0?oMRY z&YWcci;HV^q~8j&_Zc!Rl)byY01ulZvU2$N^v}PZKkuKzx3W&d);6baZO^nhhVDC? zH_?PNG;SPSsyyp;Zl~&Xd$ncD+BCH4wd-|z1N=|{4wF+X`XAk&SFN%}jV*C6Zdi6L zRg7NGv`8t|1B3Tb*vCgR@j2`v>J=l2g+agE%qq00nCG|4P;RnPWR z7r(^IXtO&C=62ce$nW;UII#8y-P+sG7<7!h40NYcIM>`3TDfzrg(-&26d(+Xyiq9y zKkq*Qv@HAQznfTfJSVxlWLOXoBQt1{b%h49?AX zz70D4BRt7nLeY-7cMZ$>a6Pbgv;ETr-vW-2OB$>yD7MMjfFh;Lv)m&V-u0d>&%!n( zO|T)sP79UR|}3LOfgIsD#d|BsNMvwklL%eGi-?&`PV$*pE zv`8mFT31_d-x*OYPwlkJwz_*1&?mqE(5)Bw@w9GiyvFSD1abWd7$EX+7hRefcX3^I zZpGUF@Y|@iYSmLwZcs#U%GnxPmxLvuI2cDse=>#3t*$gm;fS$0e>BWHL@j@zU7+*U zw9Y^_Ek~~A*v3U4HCj?TYKY@3us2RXyUT6p9u@G_#hqkTx9zMbU#V-{PTpDpaJ5EM z7dUWQb$T~eVGb>h4uh~3l}5eQs+7Kjbsf^01qHzX6? z_W`9*fg|D8eY>=vBQ7%Hc4MM0G#QI=pOAA-N;GyweJyK}HFY8ucx;k4!H#y44Gp~} zhc~)#oOHvx>DM+^^K1dx8N8eV)DTqQde+r`wLo%csS;{D1qmrxCb$7}j11S{^6EXX z%Xd3cA4gzbSZgh`C%%2lz-l9JM&BVJR6mM=I~U)sWywPBxIyV-5;FXWKmk5hp@^%> z*DHTq5QjXlR`ojX(lxD|1$r3{F3g?oh`q?;(H837=2*(?BlZxH=jRjv%)?+_IG>&i z9y_H^>?Ws)cEj7F7HlabD-J5QzG)KdxYvHQf0^*BNv2SDF~9SykN%R4fa_>b`eg{A zwhs&qfgbdWL4Qa8!(Vu#5qrmmvBVQNfla51QNYi3cn|dn`g`mFJ6)SA6=yz4VK2y{GKV%qsR%Za zH_r}gqj-n+j4?Tcr7{(9(Xve}K|zJ9fb4Uw*ENyE$olC!z+qidZtQzuZqqQ7H?9c$ zv=mj`;mhgs&RPla!DH!}be^DE)m-^VJ0t&8H|&U;Wmd_nF*O+*V1hjp%5JpYMtaS= zb`E_ghn_q6Koq{6)8~il=#mSar89|QZ~3gt)eRZbzA57eb~T>bBSzcZ_wX=;Ev;zj z7SXDm=Ee%>?zC4tuWIH&l{g8iL~c;xhCx14-h?)x9!Q*YFcH|=hW;Ci8Iwaf5dRyg z2r|B$WqxJ&-7wY#WkH+tw~LiTF71KKwI)o*GGD6*zOaA866zScfbd5KN$jyzm0;8+ zYeGD4f(xsJ9%~9LHwmZQ;cF~+(1&RyWjw8(qt#5XZaYW;fJaEbW)jef3o!+nh&$D1 z6J}Xx=>n@CtxJ=~dw@D)W}iKtQZLDrXL|!?P7Uk3oD12=7RvrbZa(!Gk-;jqbjeJ6 zc4zUD&W|B|DS`FE66e$d}L}L$)@Ti8iBXjo(Njo1GWqB*OwBcseiE=_wsCwXSe1bqoR;t{T9`(!tciM>;;i)9_@ zt3t~sxxZ?{f86 z5dXm2jbc?_7MSH;`_Du&A8v(&u6{`v9%LK8@gecr2)TAkvj@N z;$1l8q2T=kP;~L70~e-p!=~eI?oPA~G@DtciTv*>ZGmhJpT8+`78H&aB8|9Rb5Fc*ypd@XwPip;!xRw0G5}5f2Fi9BJpNw|^Izr- z2*-~$Jph;*aFng(a?pl6y7Ni>n4DNig7uQ1N^se}jJk0#biZIeSyJC-vUD01o3ZBm zkBu4xj2ed{TrZ{go=i3f=NsaEWoy@n=anx%7?HBc_wxigScAt9^a#q#I=`7BIja9) z|1izq%n)YB4^NRK8{d~!f&n5*S!9nN|1YEmCBUX0PykjhQfAv{DUr=l5*d9rzUI>p zfgo_;&GcoLPwo^OOf5=w5`5=+JL$;0W@o;m0Kft(|7n#;eO_8ve(9JIH~6aJve80fpIe-It~NvgSik6qxE*)Tht9d&ZP+ z4&8(WID_15)vlw1OHaYD4isuS^zSp!E;UT1D*cS8bVh&?bcOJxlG7X&iV8Ct&UcE& zhOas8r^0aq_~wVq*Aca>1pafv9fg#!lubvkuSE;9Jopmj zh)oIvV<=I65`A1*N@FbBM9|nc5Lb*B$G%7dO!<Pf8j^$DvV zgH`6*rk58>G|*_g#wf6)|~#*1+sv0u8f{u(PTbQdnCMK`rI^>(#(V9WUr zK&`VQK+c01^2QyEMvnv17sm3|A4ocouS!NW7{6<1eV&Puu3iErpDgbfMkI_SpHIU< zt~fr~7D6**8q_q6qb((RD=Kp;mMm}|ETdIZ0t%)eDMG{VDn@~Hv6nC!j9myP*&i&5 zMdK4Q$`uMZ@BtjB2}GXwwpu(?0Y-oYK;a{BgJvd>BfAxfN4otZo1d71LYKfGENKBv z?@H8$d}`Syj86Z_lzf|rAS)IWOF-boLq&%$NKJsopGX-)=VF+RNHI%K&;QxQwFePK3`6k|2j!jQ>agfQM;7oaP}WwtO4>GHcuy8_;N>v_WX=u_UNY0?{*! z!9{~C$Wwv31I+@_?NF+Sq@b!6$?g9w3B!w6O1o7s34(UPn? z{&^lM&BbDcs;I;eiL8>*#NLC^`~;<`PxqBA1f`iESL~19RVtT7T2u%88T&ay0ikh& zR?UP#7e)Q^6EO(pCt}J^MA+V$LN6_9r8(#yqpw3Hr7)~;6*V@}fe|&chV&1c=Hj4| zS0SXIoTCddIify8@YVARC+xock&Pc9t?1JaLIi^jjOzd+Xz7j}LnFu`Z`uvqmFO$F zhG3t_+eD$W-4O>gBtG7b9T20}qm(5|X#(tH?-W(ZZ-0W`{set9WVqbc+dpKC0*jN> z3xexJ=?nfj{Qob?E^(Ev4Klo^YwBzeWDqWJTnBD)h9ZO!$At$uOS9k2Plm;yjOpb+ zfwtyvO@!Ef*@+{8fe9IdD+-#r*-OuY5&gU#L;yEvmZllT#h)hyj_&}gS-b!`qbmSM z0^P2xCjAlyf$-ZvQ?5nf5g7Ama)PM`ef z0U!}V;RQY$!8V8|zV}NaPqJw7-y~Fk0@sr$AOZodU~|~0Hkj%;4lyOvc%O1+QBXqz zo_Fb&FovNQ0X?8L9a zTSA9G;a8GZ(2(#6OChGoh!qDH2LYTT^^_#o=mWo*X3XQb(G@Iq1b#Q1eR)wKp@C5t zLrYUIQ9M17*;^%|m6H>kRZ*`M~y{J@jXJF|i*B-X$S@z}sr`TlruE&w5unyn}3cv_L z>J2WZ9?#l72kDk!Gp9n4na@PAY~w-DVwuY{vb?Rrw=ygV>v~Yg2EC{$0B<4S03r6X zP2RHuV}?8Wo5BVWtLs(x&zmLhTmO=K`d9%s>9mLmEu<}${7~ambrs`5Ro6u7)=4i- zC}U;^$%e$~|1our!I?zu)($7OZQHhO+nU&XV%wV7$;37$wr$(im+yVgsX9OV&#tcO zuHNh3z1F(K@I*wy&n?&lu>dj~tt)yWGP=*TNPF#ZG0fF$B;vb8wtvIp%~Cyb1p*pI z>o50BbN=-4KwTTUhJ`60YcMjRhH%D_33Zsdy4oZhMhFhi?0|Mv1M|b9;Y$OEXOLKf z1#4_LH!Oz`j=%J%1KgKpkWcu22Mb=C6vDck=Be+JI)HzsH`xY!aRSzPuPk_6|AgCv z5p_S;P)R+1VK%&l$66fDpuf5KJMJN;;W4s8PDT_}R0jkw8dYTVD|7sD$CyQ)`&lvm z>Sx8%kFnr?qnn9HRAauc>*MG5on)TQ!zZgMv#x+C{!0PhOpA4-5cp*B0vsWgk*zw) zvJu@-XMAo)8k9&d0Kr)u7^EvcG^D6X#k=?nVxoJMP~UPvduYPiiCgObz47YEVQD6i zKRh6|+eS=1IsHdo^&eF00Ioyx^vUO_hT+lUXtHWT6H6#y{HRfZ$SXTZR|-sIAOB%w zu>2u);3*XgCq&j@pg1-l)1{VO+~(DC$m=RVK3Tu_3MA6jusGW8nkzS`N)-i5v5`LS z<2YRcAtW?OLWR%Q8`PRz-=jm*wprGP0*kKw-TaF==cd5bSXh|Z?_sS14-cMd7jiO)op_i1z!7P2Jz zzUrpUe<3dsS-I=fk|CFxn;(t8_081+(=2Z0fICHOPmfQZHhrjE^7Fo{!z_y%#W_ z(ru;Ha6D^h_$6LrAhc*ayCdw6pS!z1zFz70JwcbUW5vf0(jddD8vCVV!e2g4a|jr8 z)Ae}I+iq{Co~;tLTv)9j`QQ_aD3msaY*Yz}Hf{eD*UTN*>W00{xGGu+WL zc`QSAa(sh^6b&NeXVHIof;7c%S9%K#+q$t{bQzHq8vQK~8r{ov?o8&^&mg1?$~;F2 zKFcP2_%;d*r|k*hdopj{`pXV?KCfY#b~3#FTjSFMo2sL;t$23*c6-`hp9qG;UXRU> z{c~GRyXHk|z5_Cy+_ylIn#2-i36{=Xu&UF2;u}(X007EPk$LbRrJB|h^aC(qo*ePZ^g z;-8WrFAjsvzb|L&F!(w4H4OT`ie6-_Dr;klkC)PrQ`xgLHmph9`g#_X1jOTe<4uTj zq5zwF^FN2VZ8K^)devwcO77*R50H?v_&~&CI>Gk<%D*7Ocsp`yi*oO8PJjPK*1tKZ z90k{wyT3C7T0d^zrfI;lfN5-y8bqnqaw!MNtviltPnS5XV6s4;DiMf(rD_>RANy^7 z>qwLT-ud#%LaBB%(Jj}?Ce4G0d*I(*ltDGyTSTn2J7*rp*%{L@w$` zrf-)37|H=@b9_@)@PHg3P)TFooa=buA4y+Y@dfVr3vcNMOtr=$7FzMtl(OL>E~BFs zXM~m>GtjI}=8GLxx&AIJ=r4`nKg_BwqXDrdPxt65LdvI;L7sysxFfO|Xv~uCmKt};s0}nzcV3hl&Rel%ZzLzVb(fCmP=s-#r@TpFz69Qc=rWN3L+VQCBFUe_Urqpbt%fL zAv;480bP6Pl$j>7+*EMAxONwmL^K-6)Ha*deIhSfxl_pS)b8K07Bma&ZDP6QGW=N8$e! z3JWUFi|1Dq=!s)OOq_}+8(l_1E3?bwz3g<%-XaFZOV2o(f=5mw3A8PWp-kyT25}W5 z+*oDU^7%d`@%5tkS8y#f!5La*(#bmeYvP>M8Hylxc)|e)AUX4SzX&m|0VPufXw>I~ zgqh%Gbsi2@?`r7!bb{-|c8^w|Njfr_PucNPOim#dke=ZQh7&^k_r=9swq__;P|@T-A~~x-I)T5M zDp%^p2f>*b1R<^|f?pk&2-L0~L~l-|L7(ZxOcC~rPm>MzEV!+W-qg6T-jat^&H??} z9rC^;0>&Y$UYu-0Wv$nq3YQiiT4VO}3Hki*ju3UmOYWX5CtK^OF=iMnVCNS>NM2id zZ3eoN`3FoRD?}WgE;?G>W`Aeg~DX-F>@4Es3 z&sB4H*=7a5c`F{Xr(?7=0eGK>lf^2tkM8zcUO<={TXvbo*FpT5@eYBGk{>n<>WoDn z$DpLLwcU*qsBraA1G-hEShm`1@~XtNEb3D+ig8MuQEE;T!n*V` zx0Mv-kR$9tb>DUoXtgwK4V~rTP*IzRhk)04rr&pAl@bJeqXomNfIk6+{OEqZs|Bhq z>kB-(r;s2bQjFFPhDsN69i>%B>4!hKbZ1llh%H9Gy0g;bxbw|B$5n%3b2!_GBoj6b z0Z+Uh&4HKc0X(qH5WS^2ZF0G5c5~`(frRhQm+NUn0xd3`k3zD``4aEC6-(D|3S$x) zi>R0pBB}57`qITsKzz&Ba=*9!RH|Q3$XZhzK@g%sgdN7feTF)KxqZyw_(>6;lp>G@ zka78br+8|`$O-)nQn=DN!t`~XxvPQD6JVY*$|eo0b$_SJQnLoaM#-M+;YJ)L(!sL6 z$7z1GA{z5%WF|fd+4=HijX4f%sG;E+@YpU<+bySUca5742+w3mHeXE>G%nXm9Kpf*d7Tfk7|qg zt%R zo#e>dY%#Zrt>T4M>zxX&}hr^k1)G2OzGH&ioPxr^z+grJ_*sGu^6A;>sN{}=`f{YNG^;lF@wL} z*(LOiF*2`K;7B*p!GIN0UM9|H6jE(* zt2GGGF!u}MIE@>>K;l?ukdcS*gAoclcaMPmw^EVhYm5{lzQ^j>d&!ppKsVTqUE)Uo zmYzKB@(>J^q|Kr22dSL|=C%^qYR^WI`mlN$!mAg;qQQk@NIFwfQm!u%obFf{BCzP# z-#bB0M<6}hMMat}#`5Omj>8)`f&c}vy|?Ea>@Kw3RZoG@lSA@<*1B=mkvDfVJEadV zw8?fHRvGL)6hK9swZV6Yc789*kEuNXSNxKUa4!#%>@p>=6jw6~v&(43>8qgB4qViE8SZ?LpOhM07S#$y$FchTRjhMMn4g z`R3E@uZBF)lJ%B|v3zhGq&ncSSUP#J)gvqRSzFctK~b+Vs-a2C*5)<5VgS#|$^BQ% z`9_1Gfikf({uf7AtZ{C?&W`xK)zdEtA^Xc%yhxOf`$6Q4ETE<6^33I=7CwY{zD`l= zPLvA)_I=xZnB-6*nf!$RRuVtXeR!Mi1fd$@Yv&@wewZVTAyz)fq1S}G6&51x9{8AF z#y{G0L=+Q&-Xch#%Y$=ISJGbjl$~b0pdCLpP}6JPb+!UUy%KS>1Qq)P}X&{Tg~5)uZMnV#cC zkWtdKSUMyAdnr4m$5B{N^zfBBz}R_)m-bn8uHZKy7;5N}EmOSf>*Ai0K{aH8uG4U& zI4$L}K;M8NT1UKJ0WI#{MjgwVD4V+kG6))+A_8;NoAw^ zoB7s0n)-&ewUR=>f&WMR3+PP#tQ2=LjW)iFCkjQ!-wBZ}A4u#>Y`jvyuyrteGK2`oY1M4X`X6dmXvOywQfk`2zC!usbBYIZV z5`$C`ZKTLB+jJyygEM4X#~>nfml-e?0NdZXRHMX{I49KV>hGEacRC|n%h~5#G6rnv z*Aa_!M*Q@M4}TO7Jx*H*-M~=C*`6y!@8vr=5=_X(W0IVOSxkH@U*qF609~HdwuA5| zvJ82#&=&lVqGyx2PAXXiN&B=Jv0EH77&6v_?Abwi7ovCB*o###n5=H*$0XTr55q$U zF93y-+3FS-No0g*PuYkfaeV$+b>sBREy(NdtG~iqS7pID3cG_~1vgi{wm8-bAEB0U zX#KpEffv=xu#Kkf_->|V{JJvebG!$hz@*r~ z(cd_QkXFZSQO?$sW%*OG9}r#`P6buVIa8l`3YfmS+=Xm6JWyy|_ulq>TGbw~n&5n-S`*pPIeO$inFAi5ew+M81w0+&&Zf1RV6=AuLBP7fA}9vCe%t$kc9+b-$5_g8d7!GGZne3K|lt-Jai30jyi68&c?I0mW~^!v8IxRc9^M#oRxp&4zk zQ2`!)5`$VpQHE=7Eg~I*)JRIUr#f@P)$Lx}DjnO&-Qdl&K@*AtIG|Z^u08onrEN<} ztBaR?2#a#Ef>X0~J4(6%^__`N5G(rV*f+E8k7Jr=`L?&-Q_T_P(hxfNUJQ1QLDMP> z(_lee@^Irvjuox*9O)biyd7aNZUPlWONk*YT%-VM;T6Ih*6B;ohuTa+0qw}PM4gpUf z=n9)PlA6Sk$C_fbw1+5JOOw{#W#n9;K{e*gggZ8Ww4s19?&Sac;ePzg-%?khw?$Bs zQNi7BTgvONxxeoa+05eUjb^I>waKSBwOq`o=Sbxi%TzQ`aZ~HG#zUr2Nw}HEWm;No zxBc+(e+=d)ThS{OI@bVb{e*3$UIBUiI}SA;u`v+cvfvK3e$WN3>%hFo{_bu4^F z5mPMaSF~JtYaO~65a%)3O&9A^lVElZ50qv|mun?|X&yu}Ivi|b@)LM&bODNtF_fvy zu}qk)VrF|MqRj53iuRusu2$ajvp{!3=eYWN<1uWc68rKS_P^CyOOC`$E826Jf0tlQ zFBsN!Jk9)_7+jZW&M1-$HwcW=V^HUcgNGY}nx6o-cHWEcH1x#AJy%H4T80bEsXVzP zGZ%`gJS>s`u|L(tZyUioo&j71g{UTyVJZ(#Kj*|B<^AV#n z!GF34QYD*QS#%MaQR9Rp#=8B=CSE851y&S0Y_9(JRF(8CCAkb#pAZVClnV9lGrBGngRR)0-|z5h0)lmQl~g;3^QXh)(s#hwarB=f_w=^ zN!(Dk=&zRbNAffbsMIlR&02d(eaxjGk>{=AB>Lq<>pueR*hG~DA@a#Ge3b3zYf71B zV(|5Gbu>AADB#zb6PjkoTa@F=W90M41`m_k*^Bd06vhD*>>Ar?s)4VON_tI2Vub;Ch}XT z6L2F0$69)7)Uz;rmqG`Ns|nYA?A?QwiG48)@RpPp{};NBNxm=z^)|cKyvqNo2Nc?f z1Xf3+tklAcA9=~o2=`Zi-u)l+C@P*IxAODS@8#sV8(8VaJr}^yAAus8*}t6s2x?K@ z;1D)&gQ#N&a8Zv#nIQ97Yhk2!Idhw45!(b}^Dx>pLqU&mp^~0VsGnfDI%GuwLLtex z#(%&927rGdJR^a{xw#3gP9gF=emPE2atclW7bJ)&@;5`SOyX)BPf@m-a!X;YkMQtl z9ie4J)IxM1RR{u@#nDQ7Y_Ll&cW)TvIqi3e)+%J*PzPKb#$t_`QfL`X`9THg>J9Kc zkw{g$imfDbetScmPcepaP8B$TjuJ4VL09d+3mUIvIR+0#>k5nK_-Tpd zA^k!OGRCzybMP)-%wWY8Zj=NGqSLr69c9L?Pa`F`*Ubcow!o;K-I+T|S@*mD-C8HMjLiFNX6aI`9+EMir8Oo)(#-8+ubZ*a{?tQA@GN zJ1e?)Z>I(frXis8;FSqGP72IJ`~mK9JO<$T%4nyZWkeo?J&&I+*OJa3N22@*n3I-q z&Q3Ggx1tZ=_rmTTw#3BpZ^v2huv_PXA9ReI(-e(kJ(W60YY(V^J`zZ5|E{SI`OOT~ z-d&fO+B6!xqPQ{e@6+bqo6z=~NromO|GEUuCN%ou^fP7x2!h$U8Genc( zu8F_-db~Nl2^U4v8PT=nuJFl)MU9{bMJpNA&7IN|x_{+v2HEH~xm`XycPNDIGOEk= z@<%ZTm=ulzXw~x<5+!`~vH_@X&xJ%uH7xncc(nbewDM-PFY`y&`RP>KMS=I-7d>Lk znbgHWk1pgO%<%z#otj?`=mgLY`I2RKW-2lw^Mdd*Mt0n=O9O0wYqclZc9hgEmbmEZ zEc=F#>nYi`4)Ua3L~*}k(fbz2rAzWSkDhx!v;N`lUzyCwzFNk)Pr`YGjraqe&=gcU5PLGCLnX=tf1SK3MnPJfU7X=%=| zMlwX>Zp%zaIid#mlQf}i)hdN@q02!8XelN9VSfm;kj3^|sG?TvbM z>%J{!ML@Bj%@l-!cz+#WgFDs9W#< z$gI2C>b)+ZHDwyM>7$!jg&ipbIsWvy)^(@I<76@TJlm@fKZi1=htslZ`9wc#qUXuZ zalWjqd_H}c*Y#IG(zm)gdQU#018+Nb{*AIvP_!W?lLaHfgK$@g&SAsvZKht5l*io$ zwDRG}Z@RP&kq(eBb!MMhFwi#m|CyeCdPVCbhP)yyDG7XD-C zs$^ey@t518R^cF3IzaSsY4_EfP$U|OO(mWW%~K!Qa79)1qv>OH)p!5S@3Qrc6?|?% zE+cM_+yq$2tv7}^J48zY$;-S984cPIWD#1pTf0!0i#LCv@(+8TT8PVL-jP}nz^z&+ z&7>AY--=ej5pA0okLm@*!ttpfIP6))fWJWuS8SDProT)vK%a4Ljl|aqsrFilInb@& zsD{kI|K;&$kaIj3Y%CpAJ56k0UE=ksHy|>o_`JF3*mE3CP?XkRF)?$!#C5|_g^{y) z_wB6ha-$kA#gX!y6zgcPEG`)mpuQY+=nW7qhc|O^QrWVLnmelAu!lL%FP7Bp6v*=s zUIyi@`=u>;LnDHSb6s7ImA$dM1;iIcfVHvStmzD+vqP|rw9Rx-_VTY8bIP){0nGqI z)#e!=qZYH;F*9#tp@FH*I?}F#BpB{K7W%^~YO~v=o0l{6u8%3%c^z&Epf_LB(d|gI ztdwXVcjC$Vq69<`C$`ELeHh#A%H{4LIhME1ZP#jncl0e`zfeCLi;RzMZ$}j+fv~${ z3KrPj-x`II3ESfrYd5}l2r~V4W$rlT3a>y}^;Y0avo^Du*M00E>hTFVXB8g}j?;1l z@j?RO?^rc`qs9b~kqCbQ=tr$621MlNgB5i$__Ca`_?%W$dAARHZH2#cm~-{Q9os_5 zW)_s;cP(FiZ7#5MYeoQ~E zZci@stG*H#>0`JU2)9BLP3j8LCy867VkeZ8b0Sr#_J1bf53dLTY9`_ysYk(bkMSd6d<+!fR)YO-t;EkKZ25QKq?dK?Y!G>4sJ5`230MH4aw#z^)jNKlI#m>=g#`OW zg5;_CEbIyju$^$ThtqX!LV3P?A#T7t*eI!}4fexBuqqs&RT-v$_DmX**%B1rU$P6! zdp+jnUrz*6Q4`|Gf%L1+uvq9Ga`&DxL*F_idNK5VNcOxs=Tvs2hAL0K_iH~S9!%n>*sPyo^t&sk1c#A2YPVlX5I&I!c25N1(Y z*yus#4aX_?%WSOJ(jqM0^rn}noa88NUE~vmo}h)Sa*j6_bfSaBng;v)AkExS)ox3l z#g_|O0hs*_ImPR-)!>vFs*qIMj4Jl^ ztWMt-{>pv-K#r};z`mXm+Ru41>3pbL#~wL-=_zUsAZ1~bgN^ILSk1wWxx~$wBRM{| zB)jEnhQ_jo2~nFcvcEGZH~e@VClKI^gBNBshQ3lQfERl%mwmQ_aQ>b!dTL2tk{unS zB_VdhJ?@P)iBeBDE&JIi-_Ywak~JZ;^69s=&`gvRC&8hwla%~B_WY^KUan!Q$PvAB zZVN>oaA$bqaReR5%S9NG$u_8F=XAn*plY5jzZJ_Hq`+b{JnNQnNY_^jjw8u2wYM{~ z|Kyl@1Ld#6V_z9@x+E`g1EQ%JHvv4a<$IB3v5c3_1Mi?zzwaYe#`@ydqV?uyvW~Q= zO6ic~D+X*#h(vC7o&|0(Xy8^n`G`x2<1+lo$ll*I7&&&()I`0(%Sx}C7ufls%#?a=Bqz20S{ z5w zc3irU<#NHboI7HoD|5Skx70y89^Lm6d9F9R4uW@XE%O5bROPGVSk9E*lZ^og~2ic32#`;<^$!vvfGN#@#Zw) zR;>8!9p#`Zox>9p(4!p&1P-ouRNNNCobTETV}%Tku+;?SyOPom-z2bx!7&8`h#D6T z%Gg`gD`)U|2tx^P;0;e+GAxDne3Q%r3!Eob4z7d!BclVtmczR}O*2hosJl4QuLV^Y zc|-@k5D2At#yoku_D(TMMD20rW!lBsES_BmzvfTeZfKg_zUCE>07;pjCwrF|SjM=B zvva&h=m|E2B0BAEG1F~7G8$!Xrm#;KNk}g|;*z4&-d2DD8<|bdh zlk!2(OJBL7uWLO-G1Tb)p&p%NSV)7n`tN~&O&q0rx;=#y15{p-!K1Q3|Bbp7U6Uzc zFPJma28JH8YHUMMF)Kk7jNq$dy%;LEMqJIjc$zf-Lb}FounC4M)nq&dR45ujut-sZ z-;Lo&B?j(Q1?WYcFb9l=S0K+M)5OQSL`p&HPzX;DV-H2({jrfE^n6%u&xNT_d;q%= z4Bji3ax!hLS4|w?91Z1bm<@9#p&%%rhZ5ZmI_VMUHmJXlRI|q(dS18vWUKPVc>1ah z300x_V=qw79F@pS?OL(}V3>A@bM|guLv_T&7_>$a%cmyUy1PfX$}8BmXA=mDRPJN2 zR7wTUb7D2bKlv?i&;nOYFOq^<8?CeM8cEp)poj}9FB3~R`stlaT5TJW`9bj)cb!=aQ0o0CR)X&G1Ru<>qu4@Sb};l8{xVYLp|! zk&lre=ZL$K={3TW(>F^(dm!cH_R{jT4YE5AF>Ejuo{ z@kzjQ@`2-i&aBp_a5D=#g+Tr1k#W{hNw|Vz;I<>~Iad=h1R@WFtqEJIFxbyguNSHH z&1I#guA~Lj5W!{}00uE`64Ul4Mt8D=E&yig?d}e_*$&#)cdfp~)yesMCy#f`!XBVh zr`?`REF0;fzDEI?^V^$$E@bTk_IS^7<7cmwlxSJEo102P$S}Yxe`LbXcmXlOtfZw6Fv7g2zAxiZQtphuO-OYK zLlMY9<#-?sU=|i;s1BF`|N3jQgQLg??>3yP-yXpkV)jPOok*H`lGlCl^VyOZnQXER zEo8B<1HR0atz~L;eH(l=gLJ)OukjI7eCs}>OlyhiJfh+lI8Q9r+v%bft2Chk&~-lmmN}6$Y8dP*KFixq3ahZRQZk82 zr;gY+GJz|F6;BN79^hjJOSt1|^chEsBh`=oU00;@^k!Rk-&e7MV87@MF_;`UT7Y;# zz%bci54*?iz^1_xV)RE6w|A@L%n$aiXXu-b5aAdebc2TXvB7-`I@5KHt+?BNzY$$2 zt!N1Wjy?QLMAk|)ASH*!$8pS?vT9J2Kh-z_m>E<2;DnXMBO7z@Z+MomJivMUm-<8QZ$dHH z`6Y(VcO+{mSm#thF4C7*rS?E!u2CK<;+alpiU@l&rBH>HY;Iekvr(40iiP_o^IM?D zS!0d@E7byRWic$>X5_774NLd@ zd74Z@$-s#z$LxQ0&g&}Y+4OVtgvhCyxA}KSJf(oIDbOOytkcs_ApXu@EZeZZ9yH!l zdfr}!+z&f0vnmm!tBbCS{irq8hf`!l7$AexaE;0})_oquRGWEL?7E=(RUhDz6BxL4 zGQ99e3mBNMR$Go-04KaxTllnvt25215X^?T3W`#qDm<->WYeoHa3BhwMxVn^_@o<> zhhj+MiVNp4(i`-Oa=r;!TZ|rzcp`CfS*zJX1B&jH_}=1*Zo%>;Y-Uz6;teI-aK1{{ zrDR9zVN}=lxwc2*A-ANz)aBaj*9rtwJ>0;qDpag0Z(a7I@$O5GUt!*}HG%@9VN{N* z6L-DB`uz^c)QF!fVd&D*X23`QhIDs(?gm4vick2U>msv`wj4`HWf7F34*6Y_4%v3nfB|~5w4tg>LW z*_ptax}Yzu&(U!p<)H@>H&7Oz!oHW#eu2sXTE|=Y!y4`TnoHi``V^Cua=N~kaqdu$ zo)&NHhaATb^QLQ!Hs2))KV~ zPHNh-Z0!0EQ-5KnD~2&&x=&1Wkv~epKHm6 zNt0AE#D_s&P7VWF&HG^~z$on>KsWZIzVCF8>^3ERYz78}t@S z&fjfV%A&)>W{!;`B0-aAyI0yQCu*#9R&3HOyb+`FWDc{<^{nDPBA`4}(NGVZ751P~ zGD3sk!K@xliXjnMUdo1VLk|!tm4Nz;pBm6g)Z*aGfYQWz>w+5%Q0c2z2FW*j=pc9{ z@;Lx2mYN{{M>-DE*j{@eusV%kdT>7DZPcoSAOtkmVb=W-dc^2kV1UCQCE^$}G@suJ zk=0s}b>v{$PI4cMc9F35dAlvrs9-_)8eitM|{l=kQkBvEZMT}ZYj(SQIx*#*+ zud1;I!>9SOt#!2-6s@mA7p&+{ zkI|3EmTFpG;sa(x5{~&yaxv!KY&)DD{Fz-OIPA?~cs>>e*lLtGXeG!$Ctk?9YF!mV zbXK7}b!a@m~MzF`x82(dnr);ec0{C zKeFdP0R-}d+6b@@lRU9)#x?78F4^cXPX}J#!e(oekqGqOU~Azs@0LB4uaG$J0`ao&R z*U8>u)HcKz2IwcT67Y95h+#A*n4UQ9h2LX!uj4~&u>8y!wt*MrlMT7HzL!Ki`A_Cb z?gau{{e%GbTe{Djr4{(2-!YD#n5bD>Co_kD5$YSOh>K36f(fSv=C~4L%lA>}EG3vj z7!^F3sILj!6Ll*qM_$-{vh2lp?(%8!8(5Clru=^bv`nlCh~x-d9IVg`vSxM`E|!GM zOkDrvJWXoL*yBhbcb{uKFmV-?5~wY1!VvkJ4!HS)L6S1mcLX9dvevaGN(K;{@PvIl zeO5?`6>v7bVw$)TuFR|O>HP>SzWu`IAuIx4Ryf-^6OjOv$c@*$F$Y zeG}c$fWfI?dMz zq<_Lt>n2^v9{yA_C1r6cTg(xOMru-zL`V}IUL5I&NA573}j!oNW@ zN>)r%nV$%&B=5O+8CWnWF;PopR*gj4!9|2(&vcn*llWty3aezm-Gj=kxkM83=g?5J z0MN~d7RD9KksM47b5e|TsEb%GGWcn#jF8n^q|usY!BpX@l}Om6)zm>Z$pv#{q0P$u z=u)s96>GS#5H%Nlthk!1v{eOWRgasS0EvNpE3?gkqz)O1BMYrlT?LNBNJd<>p*?WebD>D>p-~(C;C`1 zxj+jkW$}Pr{VBxpNy^=mr2e^9r~hNbg$+d0O{PfahLWX2WHLhd(=S)WSC%IO#f^Ls z2{^>9kkvv$b5TWo+2KG#+7y&^8zT=0yFgn9iyIAiq{pRB;t$@D(h~MZ>{Xu92P{Ih zn#u+y7&`&aBccRxb=>3^Tp({$v9d}B zwA>f=l;_EWFSkDXAg{;PJE;Jlne=4Go0Z+-?rc!w+|!6L(0BWi#5E6vsVwH}?jI&w zi`^fDMfPz!+&&=Vd~|HI|Bvb7Fx|w0E5rxAz z9@iH_gB?5phs-jd*uS55v!3dG8`miDT=&b&?8&W@FPS4!S(7h0vSh{Hx3ajGEd5xJ zEDfS~*BA>ra(VL8t^wf3R>NoR-yRhFS8yQxNYCIxy(6LI^u_mugAR92RE|@GbVP&hyN16;WDkDEwifn4fr+Y_ z2fD+c=&1;xX!kr|={P5M1)PL(%*(;zwI}?&?3#K^E85iRrK|)I?$LpI;e;1$2?w3M z#(-e-Iq(i&QEvddOQ6%vCaqJwZ)>4{mEYzekk@&4x*5KiD)uetMUh8LXhav;tCdn| z^r%z=Bd(z9L?b~%N@%%FRXelap=&J+0lyBTy~7)!_9mp+Afb7R1ke%$&?f5uC9C6_ zf~%vAjqucOW!r}m&_i&1_Ccd1h;>DR#8)k~O1h~}k|)qYPd`R~u&3)c7ouqGcB_uzE7m;?MDSe76UzH)RUdFmpccCkfY6ZMAeZj0@H{zwvB0GsQHIUoUp(#6p z8((k)uTVcWjNfl00Dfh-Nl@Igh?KfOQf5I@dV>GS#+XeXHzs_}F!=Z+e84Wtpc^X% z>NUT_fZ1brgM2|Sh+OE2sR0PsQ6x>5&i5URImZjD*zWKErXJ}zOzm09G(thQcb4trvx>|<_I7P2E`+1w{S@?qT5Eij_-J`e$D0} z&;)P6+Sv^iMkVzDAUJ|%Z$7LY_BCXQ*YQ6@R=^PF_{z3s+)O z8@Wr+0=lQ0nZYbj-XQ6X^?XSvo47GrzJ7n@SCXEmA)Hjg$!wJPXcv6BQ5egFEOK+4 zP&}k=7&)$PQb?~I1v{p^VLhH6sBf#Cz)Zxrh{Wc41$84{&f_SO5#M8SX1Mv2w;Lvz zlMSO6?}_6RRpRF}IP{F_1d+{O+9homp{!1k17zmxf|lW}=Pp7@(%6f|RU{j!*eqDz zJyrHj3_Vukwa48&&&7_vC(Oyy)Cc>r1U4DQWul427dSd;+ zK+-gQ(e*NFqrvfFRQEwFq}3|<5`tXz;u@G-5?bnC&(D|J!Y(X%TW=bhY`1`5A%bq}dN_}!4yynI@6RUd z3Ycdj@8Gj~Zjj>gF=5e9j9;Sd9Dv7-f%>1b2aq2m<-jtS5Fl=y5V7l6<0ZTar>4#k z*P!FfZk?|&Op>pWuNJ;eiBVG!`*1tWXoNk%CzE444x(Tpv}!_M!>m5nt+F&N)4Y+k zy}oeuDtO#_3$8-kuBX(4gCA(8KS?!;yizv0M~ ziP$f#k$tI@9$?SN3(-I}1myxTe$4dU@Q&X4*Gi8S&hFTnQT@#k`ZC%sV!`s05|Oex z422-k0^X3PzzJI#yyYa6YH#xnB+BndN4)y7Px+1mK{q&ri33$L40mhdHeI6LzTZ0E zUs`84sWugfYf@TPSR_KBT!~xuj*Btl@2;KDiU(h=+<1 zt_|?KH!n~$E{EfgdrUrS&_#DJIvXCepTNZC0}n?Km^|As7U3`U)K`x8r5vKcE}}e8i0zAdE#w{}Qq~m^7e| zctZFNypG!E;lVia3YVTz^8@T7%j|_9>GGPg08Yj9xBgA8ty<;u!1U=L9 zFt#kxQPoxzFOSJ;LNcj;wdPUvj786%s!#9j9ucPABq#s#7E!XX5Hb?l8(Ty3^8T+1 zX8JMXa54Vx6*D0dI~yBE+AJRg8YssP+(a<}k`|!5{==uPEo>{KQk63Hu4ZEz=m^Q+tOqoV`iz`$GsVDW@}fE-cWZJhlE&{$weiD>WiY@hS zuW7=Qv;B($U|?XKFc(#m{_ynI^mgz^f`~hOJc4rK{|{3@tiK+hX0a zZVrHziS@tX{_Xu&AZv%eolVTl9POP<9K5U@ECCkQc0hoVq&$3&LOZiCcq!X77FXV*dBx+B!ITIQaYrSy(%mTl_`C+|7wu!@=6w4Jaf2Z<}`! z!XKF>&=tVR!otGM#RC931Av}pR?L4TQ1^0w0{*RJ{Y(7L!OzFZ(FtJjP6Oy?Z2^4$ zLGW=gaR&liL2f`lpMNU;7a_2+0?e(=Tmhy)OKS&&KhfXCK#PCj_wxr?djj-W-p7v> z!1CAUzfT756K3w{VCVJ6{O==X))ti#Q&Ok@H|75-MMWJw0X~eJTmVKkP8I+wD;EcU zfcyQy?|h)Id!PPOvirYhfac!|LJRocSc;DClM4jU{26q8 z7ETtk_b=A}&*T2L%m2SA|0~M>>!APNiX`3a?Eco%{LA3~M{i(ms_nG`#1$}==$ygcqiuvuy6z+{B@#SoB-yJe~JEog}4CB zqJI!KfLZJh;sr2E{13uqv-xY@A*vrARYj-=^yl--R!@J^Id1=X!m{) z|B?K49?a(dfbX1v|A6lWTKsW;e=pF&+Wn87^Dnid8|WVn?AuLB(^F`KCGZxMVP{ zj6`sBAI?2d=WcFiKjclmk;o>}BlsQ1u^0L1Zm`8a%zLj@sI$3$bmLLr>EaSW-h3FF zrt%Tp|168M=u`47c3p5Wmpdw9csaB`XNHo6~>qA?`jt)##%0O-jJ!V~yLJ zuqVD1#MtW0@#lJfet=;vH<^}_hfcKs`$fL2sDwQ;u9oj7p*rFc*HGxAH}ICv^zDw( z2GSFX@>nD#D&8W37jDv##c$E1XR(*wdNvoATFCuPDbDyJWg zIZlA8G175}2D8yj8BPI4Q{#lf*+IW~PILJj5BQoj@ZAc5Ht~rL2=COBN&(cr8CD!}T~@ZK?!jPJF@cwsEhTrVTqYaH(}x zC3~5w9ttCWfBpnK?IvnYeRzS^l?zaoK2HQmW(nL$$QIN+W%3hjOfxC^AC(4b&ek}? z$;;9kO~cvDHf$ldzUsKkEftV_2kN`QWyqhaIL8ZUCdqtU2*W>o;5F%hHTp$Obtoy` z%qvB%*h?KH~qVOv6F3a*9qJjx;Px5muisdV>@i}9vQ^E!XkAi0=J10}zuQDe8zx$N#E z&rAh>shji8iJEK_#-Igui>;l!-QfpllfpOVZCd@qi9BpdvcZ&3xpuMf+at#SlT@k)MpMP_6<_6h zHqk@N)dr6%Q`>IInp{N0AjOJe3mg7;b7As-Pj+PVT*u`^y_OKW2|f=u=yVUiPEXSC zrV%k1)Kq?>uzhPUc&ebHE^(;D(>Ja&P>Bf*;oC>qT5Ij)vcq#=G35=nH|RLOU^rr4 zE-BL2q2R*TqYu)Mh$bIwiB-r~I;KIMtYZ>?-juK2D#O*kq1zO>_hzRq{zf&G%JnUO z^qehi6(+ii5}KYS^*Ylw(6|m)eEpTVZWtofmgd7qQhb7KI+{~8imUOh;elx1V+8Kj ze67B8kpH0$hS6>5kEDiIEVw5OGsJSsG>7}6sYRB8_-+PKfCI#KSAk(hyr{$Qx%F^ILYdR z={l=XyNHgL4+UysCsM`ZW9u2(<*zJk=u;ApM?1fF{Q$o`${Wcc^JMA0vm%EqUw6|J zS@zr9Um{xwK^QTYNtB?2X8_oA?HcS3xHI**dbYx0mBODT*-G^2*cDWN^D~csU}6Ny zBJR}>-H3fkg$?*7NKz`djom1YY~xTZL*|949YKJ!D&FLKPt}nh*>__rv~Qi$(Ythv z28M~@SrdC?+UnLTOzsmK+3J$x-JSZp^U(}BpdDF?cowN(P{7QtbntAl ziP&3BOuE?|vQ|E>gbOO?^O);@KsSj##HzuR_LpHh%Q?u=@3u(>IAWxP#^C+&upI%^{Y|v!AcVH9rkVKqJgJd|YKWo&NL~LdvAr~e7!4*UWQiED z_D%LYXukz3pu^|>;w=e`#0t`fAhBmX&#`gODu-S%XYxjJb?Y`Jv}@~sd#y$03+o-a z5Mepv<&oU{ezc_*f;YbdOa6(j#5&Z9WRb$}u|f;d)qxc~M^6uDr7QLhyko2xkBkv_ zJ71o#O7+l^Aiyh529W!Z_FQea5#iVe6xO`9yuXF+GHeR#o;Qo^KB{6Wi&_g^{hm51 zIy+u*kJOZ$*Z*i`TkDvA-c$v>ftJk!MFInn0d;mp2h%l36s0JGui{?J%K_ot{uaft zcxNAk3UJ_|B@)j$TsJisTFQle6F3{!pt0P!`T6jRrqEc4EiItB_EaATY0( zT_t&nF_W6bYkK{p6gMxP{6-c|3%~-mz7|Qqt^XAE!S$T8Y88(?E=7Jxr>I(z2KR_<7@_+g6}MJW}><5y}nMFDoVMOS0HLLhdB+}x?;jM zVT%tLw%vn%G{C@ixQa&;A>$tW?EK@Ipy1>^=mwzq+c4mkZpYs9h6Z?7L3k;sKR8-L z`tbUtPqZo5lqYbiby^#FAw!|G=6geI<%gfeTOX_uoZ>fs(qm|cKxd^$U;KZsv0h~& z_AVx5!V2&Qm@`xx&T|->D0s}kfC~8O?96dML!Udcq%Y9tyf8pq4kN;FqZ8RB$jFGj=xrsNhygm@G-kP ztfGoa+gEAkasX^M`>l(4^TCP6?hQpZoTjP)# z$#zuacm<+`K|M#hYw zWxJ&Wy0dm?@Zc=m+~0&tZ9k*)U6G*V#lS6R5cnKSW#J$VfpNUtXB5^ndTVl2aoXNCYMfLgf zKbG7>8$5sg3Qb&hLtueB;^F1xD~*}&a~Q7Sk1b%=HD>ava6tr}js(^jh<@s_ve^1_ z1J!7{qyK4QUPy8xravv(JxL{E#0E9*6&)XcBI&UzhYO1`*DMf{yl}|B@e^WN^be(7 zXk-)$_C$I+7fh~Oyd>k*z$MU`nU4aL8`m#}&YI(8W?Wq&= zB}6Mc5DA!{2_L*-m^{U1mIm{vg1a6E05nxd|-Kvt9UlBP!nRd`j;g?h*QZ2kNFzS zzcP(fd`WUQIowd&N#p0%pd7R1huoG&5*0N*b+s+@gO}*tL2V4wkCawL9vml=%s4#J zMyrYNUGp3w@k|kIT*#CE z7s?1iv43Q?8S@wE{mF8A;`lL2QTu}?M(!yj>|C`>wzHRTx{Cpclp#lFV*akk`SwZX zUG(@iu0y>NBzd%jQ#8kx9)f0nb)?HxxYB&dsFXS(!49;GpFHPH(G;I@^UQZ*sK{AJ zrXBB7T;22v<=o+LDeNC|NVe(00@`s9M)A@O!~Us1hu%+*$|wHSvOj-kQ>k9<-u zfJEtOM-@a+jIeNBiFWEQYPBV}A=yx{PR6F+YQXS2V768S1f(fJv6I|?qZ^3U;eK9< zTmETWh*;~SCQR94?fWCz#a6&SOVVn{C~YAVqZe`SYj@-8xX8XtHpuG9V@ z(jn$Uub+oPCKfs~m=An^(PTy7R~9DI(zI;>dV}h0un1{Td&>DQCj|D4FwPOI(S!td%$gfCUeS1q+J%Fw{QIG z7+nd?%@n^c3@0GI2r=knGZ9HXt)>)$XXa6Yr%3hTv|-wGGEjk?Vrx)Ms8y7)pdMxr z#FnS4RgPPB>TkF7K1F97txZGGTk$YOV3Lj$HwY)bari;g4~&gm=ck{Ui;XSh^WPp3 zOZ9iwd<`rd9MC0y|Jaa(n_pUgHiHIqmS`11h3Hb1FM|($4V3#SLgBk4u2q>4GkeXf ztXL0YIF-sy$0B$0JLsVQWp%Kb)^8TFO7=j(l5B>8^O5JmsME4s%!N&?k()s-Cl7T# zR~T-hePWXK%Wi5U$1kKulvP%8@LtEZ^3Sc!I9M_y+GaU_%6C8gqncR(Uk~6+E`xs? z77cK`V2yAQ&8E=W5Gj3IHo*e)n;;uHdcDLMekq@!;>B1`i@Jf^8!D$P=n@d{*eVgU z6;7FeTzRz-sZjPMw7ta+;P@)}dK~WJN2mSL_fzhhO>xGRnUg_)e+G|0x5)OVo^)lpA36@q9KIr<@@9nw|VcpqK@i&@WPMbzyKzTv@nw- z8qc)iOv2*}U<%s<&)qXYte8t*v9oa318us79*Z1aP`zZnVyu8$N(Xj9Zi& zICiUTwd3`yJ!Py7bn(%FBu^Mx9=O@#ez;`Hg|yy(olhzT0&|Z2V-`~}{tFgag3j6B zn=xhk%PYi5byqs)<-`d_X3To-4rz7-3bb*DKH)Ou?2}AMAG9K~OJhe4frXXICfxKw zDs$f_rIP<3PEe(9G1J&ACsJ461gFWRC24w=kTW!2p={_TyL?rPVUimt%cJt;c8>6v zmwLf}`$s3CQpALvwjbdOBDf;= zzASe=3Dn#sD|hM6f?R_)odiGb`orirF1oQJr3*2}h^jCZB{q&%4^Cv6wpN;g=2DHH zSn2o!$@q^P%dY)W3o3?T{hteGR~rKQ%>BoIfh!H#1z45BTuF|Tr+u4JMz6mDzdSG4 zpC=RD*0vuj+L6-CF6DeaQ+=8fpYO;uV2l92`>lLsK}&t?{>%|QS>7Wds~Iq%$J=FJ z#19d94Elruf*_3p9*-aMH+UM(oQA9_hNO>W98@IQaBF=#B+sg!NrIV$O<5eR9~o+Y z6V;TW3D~gyjidKlrdaEvfJn>!c9vib*R60h#(^amdMpL9`lwXt(TDsX7DX3%&YLB$ ziGxIWk}J1p#&~e91xq9MM+gM@UB@Dc>XyL*e;L{aIZ}4*iwTeK9}vOhRql2^_}ulR zyG{U8mriCJ!!8C?Y1J!R2HCb(C%tM7@@HPhzl>gO?ti3zt1dPP zoNWUx$6Dv%K+MhMJvdpGusy3@MBBgQzA|8a>UG5ARFg&~m&-%N(W63Itiav9HXA_t zf(L!cFOX&4N`eN}SGT@3_;HE)fS!;?#LG)X?Z|Bu{VZit+xs=w{$PpSbMZRLT8=cX z(#0)iD#<*ien}97D^{y5d}Y^v5t2Hl8K7nEwYI`3i~6a4df#^Sh~;Sd9#O5W-AZju zuJv~uZ3`B3AaUU0V;O!_YNK6?>88s$LCvcV_z|CsPlGWPK+WlOeh#S)ydXWXO6Qkl zF{BW0>D|;g0BsdCZb2Ht_jM7=W5WT3WSJkeH5G6m^6^m)K2z8-z86Y=G0{2vFb#?W ztd0kbwm(6=XdHgP46OUZT1mR9TI;0T=s4ezEyvd@%vCkG&wfk%>;j)x4o4dW4$}Z? z8io)PF$s=N*JurI7G#_vA7kJPb}vkwF%Q8F2yZ*J7U(pU5{MNlU6&Ua)Xpu9;EW6! zzp#WwR~<4+jcU)|d)M!O(@Xg#!R1>RC4meiq!L%wO`KP<<1IfG+r!TrG%7fJeM^IL zr>POIV{W)!mNzX&4>*WfG~YT~Qmy1)k;)PlHj(YqR7c%>^)lRFseEg;{A%yNk!f6~ z%ND!8ITh&}13l&Q`=hK`nu_XuA(FF$In6KBm29t!%jR20@o{5+^)gwp1gO<3=?HIC zlhx~?St8}umvNC5B!&=jY^P0}}rD1rw?dJfz*JIG#@zRBF6(3(S znkKa3CsCyy4R4fxkyUBaa}%j*acPw56ACU04p;e(Mzm^KQ}9NF4w+L$8C%I%NBvHK5{T zAb4H|ga|7?KveYU80*wemnMG>hd>#Fy&GJ}BI-AsaoM$hy~mRmsZ(2Gpe&#SK~ zst*ZYb2n7!#`fg@6*5>`^8>{Cus83ur0tJ4OeX2;N|Lf zNgxT^3=WwC9JpA?wo&A^q%6<$OW(`b2%FD!*Ec;gG<4RGSgav<3vlg`s8+Dl4+Wb( zU6uQPGRt8fBnyV&M@v|waOXSJ2N9W1{g+`xYer8pnGP)tT~W8W!#BXh<$cIT9o_KipP zxvcrrv*e3c_;oUM$beaZo^7y0Z!Zb<18nkl^sk{VI^8Z4_*JyyfI~y!9vTc`eQ3?N zx=#Ovuhv~U@M!+0yOhdeiG}yBCwG>eVB^-Yy{I z)R4&63hu<8JMf}3KD}nxX>&imUsBsBxoQj_NctyKrS~`1c_%AMg}iE}`ZRse#^K() zHyqz7qQe_LPDDK5EBUaI0;7T0QVLZc;JdCsHM({X8hfSIS-q%oFj)G7;K62pJ<6+h z2$WbYe#~rLn1h zH{2?Z6(s7TL`%_`eL`I$E!<0|iZu_&AZO7SO ztiWqC#7Gl#62(Z#TIZsx1)~dpRG*)tyzIFHM}OrO#r8elSLPR7nMKAkNa`-O8oGb< zWMWJ29u2R5@wzjd0VFVedNxk9t2W8k_spQfE|aC*k9LE|J;{89y(()J&i4f^ z%yr<&vlAs8>u_Ack~ee#;VyJB_52nW8wj1kHltp;240Iqt00osbXS9atyB$JF`iQ# z!bd8c2e;m*h+Emxc>fYId`?u(XA9!k#jwBP05@I+|GXxL5`9GOlKDWdl?JHAdka7D zsK+C@Y<`Zps)}(m%nqio3&{HgkJ8^>iUJswdEpJ0qU(dZqQ@u7>VD4B5P}?ESM1}> zYUn<_hratlVPuWD=O+_?Y=EbdZP>WD*`vjw*O}YCf-T1&Cej;1soHZ6y6`CA!A_W^ z&b6zOzVpxKwKc9#5yOy5q=C3@W??OlRMVM8gV)P5ApzLq(IoaH^yVS{bh@U}Ki`h? zLmjJ+ysjiH@4URKtK~=Hx#HB;w#Raz*FBa&fU!r*+i#wW-P>+|_x{zE9a*Nh>gN%+ zh4NcImCC7Za)f@$q|IhyPV=^^1qQ#@i%FtZbuvG@N@%QV9LQ1~)q zzvJ=N!w?8Rs_=lb2HzGFdL%l+k6uxCBQ3e8Uv3^Z5!f)KBUvEF*+spoidb{sH}P13q-I5Q!#gcU+TnS zc!eiErY{ET9u^vz3Cobw`mq9L(uUiAg3^uuC46Yx_DXR6ICCZcQsndWkZ^L{3$ zkGvsU?~HW(SDGhbX4Ss%nxevb2h-cKb!z?(I|bJH(umf7N>s75E@ua-Yj9$=@sBX@ zyoZXoD2AW>Au`uwli+_v%RmT!3vo-3Qx_=W4U1DDR)99n7c!2M6Rb1n>q_WVt4aTVDE&kD$nt?YnL{9(GBhD|7B>vb76I4fGTzn=Y86UT4&jEuh8QcgONas^ zN%W~3-DWQq4AFx@D1o$^wH!w*AJ%nxq9V?4bTUSswZFfu7=zf=5EpuoPuVa#6r?LV z1B@6<;sopPtVZ4xn=8d znu!d54dT(Gr<99%F6N9Cf4W-}#fmblZ-Sw9edP1@_{`;wl&P_3epi2R`Z-O^wcR0S zB*j8XbYLrHw2>=d8b*ODlkl_B$D#Rc234anO1{vssp1dY9I)bcHv8^I$|AI#Ejq%i zrhHXZxEljML4UuaK2fO8s4I49L>#V#bb=f4A%$oxT zl%RdO0_jG`h&a1yVCi6Mlpo>Nk|o%6YRb3<6D>`2P4OfBQ!~Z6Fbr!rI&7t5F;|0s zhJA<<@tP||9X~jEv&(g*gku4@20Qxv+Nj^HIB-E_hTJhy_^i^2J%G>-IXw~2%i}!7 z&lZY z+A3TS$&6uQQnvFteGdNu3kcTD-ICvIQ+^!>OgfgD&OkT##`YqL-4`!NwgD>A4 z`cQ_pkA#{+1X1Lw(oJq8L;()hrk#k0U#QMij`COpD|s0T(WE48Ag#Tr?n9dwM%AcS zf*ze1>~l2O4`w-FXh){NIhG2O9FKlBLAjHjs_&-&f=L_aebHHre8Sn%25CUS74h;X zA0{upb&2+50>_qcAso86M9XV`#&|lLP{109Bvwzwi%HPiM8`B}p2D#CnHUqGbq(On zZJQu`RDz62G2(Y;dGL)#4&};$WnA_*#6#7no##;I))1Cd#|5XOxdofjqP5nZcq3UJvr4mV7g4<*x@!JqWJoZsuPqZ&C(#PR<{ey^qE{Nb9$uu*w zmPGSyCsXm*a|3=7YPqyoWiF4mYPlw zI2NgmTr&r=J*kS$toY`JX2Imf&-f+lu#k5HOp>7~!m80kc3Y@_pi{qpw?ux4%_cuv z?L}K}W14_S^-I1eHeTBU3_d(da^kfZu%sN~K}A^*W?EDDNN{tWSM-}%O6ya)C(>Z5 z|AHXwfgK%g6j!2-!GsSlJ<|!UkLaEK?~iSvmFVNV8NM6pH8ZAy|FaHD5$N#Y63>;KSdj4TPe zUg(Z)N61Lsx<=2S$t@E2DR;KTH;9|yj7OWbUxe?4;Jl3=*d)V?`I)U;j~L!8iwwO5 z@<<%&m2~{0v-76ZC!4BBRVsZ`MjXGB+golm7^gTBqM8(csu%E2KALTH&EeUtCfOQG zJ&zN3+s*LTaL(#>Mh@~-pEwru>Wijs>3l9*mB2){WLoG(G&;$_H?g{Sh=bEG`V?v< z=czxJVftLQth%*k@fkj1pTP7P17Vq*f3yiSJ)&WYg{m!HlVLO-t*NR_Cwy+d=Fi_6 zlOP<8H^NJQzh!XWrrRV@os%t<80u5Xsu=f*ISR>0MMqW^!SMl~$-!*Y`6;R~dDM@4 zW@wA%wG@PZO34y!*_^Grgdn95AAXAO$07PMhk{wDOhzg?G1 zJbw6?iD6mkO*@$I{Ps{4v5azd)TjmZY`LyGbu9Qz)(lO{cQvcnq+~-JnMbF$0e+P-#vecFLPq54^N+)LJ2K^kcUjFu59ggOnPM_C%n6 z*0P4w#t&{=p+#wD9kQCK?@iI#&P(%4t+KsUjC+{9Y$R?O+isE)d+Z%k)ai~m=zFPu zbPE%d$`jJfY>wgD5+HVxn*(;3W07JJy8K_6KljHGNAR!5#B~|Xe94y6>*`H9`>xqO zshl+Q7FDO}OsazE5PxNuF%PRu%S|TLoXEi%@lvqMgv<#vr0dswAr$F=Xytb<*E!p0 z*qFRs64AMgMYL1lc)P^;0z)J}V{wLmeKpd*kny|}&cWojpj`CIG8&9#DT$?GU#>%9 zN^U-QzW`QGNo>GK|5j z+gRTi=vj#eDy+pHk@hz%`Q}Toujx2(<-5XYxId&so(;z!gO*1XhDbPUtgSsCDWSmFiyP% z7fvIDwy2(Z`G6c1mGee_b3ARs>sH7e6#MRVLTygR}WIWqJtG6Ed@!d)sP4L*PBa`OKPzf$O?h0;=|ZRNa6P>xZ{fbD`&z zCSUi8P+Whpy4h=%Z|)Kh;4Am&I*zrA{{qI>`dRTYs2El2<`;BT{+el3R~TsQ0r6LP zVi8>xCCs4yPmHbA?zz_NsbFN74-90wgSiT8&!-VsU?X@&`4KXvgDa%b@mkmF6@C6& z)Zdia*RZC;wf%N~!%5sURF_z>gJ3u_ZKU2@OlL>FOB$@qdT9RS{0bVpdX+l1&c+U* zX!-DF&D`n5Zb+WYK<(9616TbPK)xu_2a4B>1-BoWRV1>pg!g{6#^f!9PW1nQVmE&+ zPu82ME0UZ5jqPFWqi4WX5)!ef!8|_Kfv4$c9Ptd@m24k>HQtaeeZm(mnfJ6s!`T%h zZama+6Z~;s?G4i`A>FhXR8YWE=h#NHZtnIH5Y%Q8V3a6zCYK{fE^cw?Dbl3rFK`b| zW^HQj`}0O8;~fxY`n`uAY|iKL;pImHrdCNe*=ew4#nZ*8HX7W3pclNXR? zkL07-m)EL)Ko}SoadEy|&EeLs?9Ok-nn~e$cw;KU^j~Xb8F|VBM3!Eu;KO5F8H;|E zJbH_0vJsdF;1tW%Cw&&BuCM{{>Tk(6eaBEJJZ76pajBD1XuZ1qXjn;imt-4-a&?E1 zWLXWYkKFy3^8lNq%DXXX=Dc-+UQX{tV&eGO;=;&(vu)j!^y|2w6NVW=y z?zaETxl9HBFD>-mumX{dj4cjgg7l}XFh(CbXer}w%`h?i?F+q9v% zZ)mkT8f_=1f#QQl)o_fs2*#izWCNTeJbYZ5Y#D@xyU*^6nx2aW6N|+;A7NDPjH6vg zbWYDORu-@8LG-QO7sAn^g2i4Ya{)o=S49MWiqM=7eOdbs_XC{57ftNKK25sZz>F#A zn$<(PaQ zjtulqJp+>cXS5LTqop<-i{3)(f?e#~7Th)&e{g;V%1%=mGwK~6>*3_si(_39m`xCW zVR{i4^=`tQeuNZ4qCH%d$~Lh+W@TjMUBTW*k88vZYX#{Fz#L4R7K}b^3$M%6yP@cja3eB7N@AFss15h0@HT%*ap_bT*2iCNIs1G7{6z$2QiDr*O1E?+ud>9vpz&(!9lgOqJ zoc0%jsL_R|llMmD);JDkw3}kFgXylqY{cVplQ`kd{vU*%uzQBqy z3L|?&*)P~Ih4_-1?*P5Q6yXhldmo;4A2g>ZG;*~{UDO=-u=V z>8xP@Wh^vI(Cit{K}sc#Ip`#RA4BW%TEzPb=V8)YXMF)_%iUM+w7(#le-YsGDRRMW zMQj+(nA$9eyXk^$f9ea<9_c6`00Rq1y=kNB{$f?7d1HL z_C~F{fOOE?8Gg)4)3B*yjf=@pFY1ehvI7R@0nAo`_GmUKv1cTA9p{sO&S%tKCZM|; z=TxGv&r!10Ma>8OzGa*j*_QQ-{1bS}!_x1{Jg)`#Yep>gKHtZ#DGyx1pQK1o{nXpZ zS3A~T(fAbxpP`+O^b)W@uj9aU{ z_&`8m9u~J?iJN?ZEPbm6r&FTX?7lT{P(whr!>nf;R)rd4) zIgV>BZ6j*%ng6jmvUUCHHX&c<_Mo%u*2+;K{Vx^%%w4ew>j}=NCpRzIu{m8K0i?i9W?I)$ zFNHeB=p}P*Utmf?*wNlsoPN_&gMVs*&e1a>a5XNpJu>(@iWS_ z>!z5{`46Ar;Hg@L@VW?BN)HHfUJ#$c`0y9>LXLPUT=C-jVIBx^NYMi$tYNOW+M8 zbkX_}5-@5adoWc%8AUgAgo4tSfYw za}e`wKMQtqVgTI~6E;~I&PeFO>N6!XoPUF49TPZz7z@@p2xp8tZFMYwGIAZ1^UvDN zxZ%5fgAc7)xir>_pD{3tfbic~QQFzP5|hF-BEPuh>9v>zf|!5Vk)8v-`4{bCP22EVQj&r*cNR0}3bOAQ>gorJA|57zn+cdu1@;-ZfY@0-z5u2Q0LPl634 zxl+h~w5N%DP3erYCk1L#*kYPtg?;!dE81qha{!!`kfI9_}K_9;H?P; z0xpiBT}e;ERU0rx4Mw9MJtm0tK{=g`HA)A6;WhVS>Dgk225xT@QWb%H&zY$eC6oO;^go9VB6`2lc0M25ZApEgL2jz>yh&$m;(t_ys z58T`Ah~>)7WNV&=L*UM;$1*+4$kFYRNh9jH0<;qXS=WV^`M@4yoL{n|`fDF7x-;^B z_5AUl!0m=+agGwT8q5TSbAntNFComrrbFucQF>w)>IOko+?w4*& zIMQr8GJSfGSu8G}ab8gv8fXEt*C0oKwMZ*DvV{gK2=Hw@;1rfSrF=aW;rXygQ7G&d zk?E#3;Bxfja9=~Qy-)nr&02`>)5Ms!u4b5UO*lo7%W6x9jnMkOmdEW%h%nlCRjBoE z;ZG*X+1axct$`$*TBzMGvMby^4AC_ba6JPIA?h^<-eKk=>6lfeEFIgBy~6(%Oz~EWecK?cX)~=0f8R|IQgz6;6)(Vf~XQDza8T zRY;HV+5ghqDy+;~zn_+q{xJ0;d^yIhA;qv$Ms+@U9bB@7ud*CNVO zD#guj>E(5jsmrAhijuTRHBMt(+VfTz10K6bvhG3ebq)|U{$iNav*H+$t}H#W_TAmMlgByd!0ywr|a_#rP>k5T~-G+95HBw^;e)Lv$bJv+S2 z$a8zSek#QxG^M*>zZQ%nMDGNwP2Sp>lg0AH@}z(>#3nX}jDpt?H(KRj}vmj5x@}uyLO014g#t zL~R$)fpgs}Q|jM4ms?J|XMRERi}fn06&*%>mkIo?eDNC5qrKv`7?fLtvi%?j^_{yp zh{p~KrTCCnmU5BXl1&e)@e~GL;M4cDP{$Hn2nrgkr31GnpGXz|R4oCnd0py&+6+>r z6oUCn6Ik?rZ;JH|G|tnS->KEO197`q1W{>QE$kxgA&!zHqAob$p(F7Z&PipJyQ4w9 zm|szd#G_jD@U~|pjUy(19)Ad#gI3syX;?zrJzmY)a$2bI?+ zTVk$qP>_7WuCLVpF@6ULqpaGZC9o#4^;Uf8<_npB$AkbYuS$63rRqpfPlOI=<1ico z@KErPUu210Qexk)nIZ1snA?Zd9u_ZCv)-0wexsfM&3%dea9{YV)9UvLGlCrMQ!i4y zW~c0|Sw<dV1qzl<+ zNc+iuZ0${3*52fxsD2KuWDiAq_xq_Y0Nt|$DdjkJWP@pn1bzT)VgXhfc`bA?@o!rv z1CD1`QuSWzO?}m~RZ!qaUFhqco2R)v0 zR*(d*u;BsNyFSCKi=n7;7;x3Hs02_KOEUrTCTbN z&PHO}w=~w}!A`q_WNS7weWNZ(eGV99iS^&5o)LV0y$6z$nv%x%1qe{fp7iN@#jexS zaH#ind3{9|(uLxhw##q}*0YGd&N||MlbbWIz)71{ED-Lq#yTIujKpO!=!f zbom>cR;bQcE_0oep~uiG1g(u#2xw;h01`>REfv zxm68p$6N^Wj&3_78)8U^#r^jKb2Bbf?<+?$1aGi7@;>YuVqF*9xuu@d9k+#lkh1~u zO2?K=Xp?*vrUWn2E^u73$-;tL!ACmzK=~M`x9?V)nsFyvd}9)XyG6-2GMEfT-bcKquN-LmE1KhB`GP_*=xHP!y?&|uwturN<2GB%w zr)|>pU!yn0PVc}k{EW?zF3Hk=o}w4b-QIz`9K$UvoibU$p81>4oJtXhCQ@*MZkTtu zLs<%cZq#zk4Hhw&O>~>Rh4r>X; zF=q}Md&X6Lc4r%je+spA!k<7e)*GwBdX8Y>{{`XdwA+twsSh!B*85R^me?FKF2R@? zxr_?iU^5;H3}Kw60=-3Ud9+nG)hacB<#~~)pPuhV_WMH##7JHuO#jJekHw)%AlK@m z8eFP8iuFZA9vDUqr6yaC5SB;^*$+IR-E*+JG_NuQ(uyxJf(V2*zu#6ZBKn>@_ z`YCbyUTdD7J>HY61$3Is!aw31IB~Vqi6|G)7u4PgKG*wt4s;v&(@AmP!{06LxKu%9 zSf~ajjIZ0 z#8tx7B3>Ya(`HK$DlGj!0VM?5`=C_L2ge4@Z?Ig3w8HK*e+$S#s}zP#z$@)}mQ_I8w(@lamtQefgP1F`AMBu>Bt%OooY z(Ms1Mh*wju%B*DX}c6G)6GkS%I1P?q2 z(3(Hg7+vz5y(@+@XxD}NvUQDrPiXvp=y-&To^D?qO^xA~?HaJ!8J1hi4iv^gLcb=8 z)uY<%hA`5++uiiCR1pzouzjL^a?pqb*7~KpGeAuUf98@|Fcz)_5`P~!Oc zWx=_vPntftUj;ah+hxcVIwx(Jm@?+*9T|2cuKf5v2X?ZpTqC%-z5ZgQ)u`wCC?PMA zcY%N`e={UL(wU}_p)7T^1ksf|I6|NFnr|v0NP(2W zR?gjvIMX~QNB8=gDBS)1<9OyXW)S&5Zea@KdBZNQ2DhaLTu>S-bFJXNk{h}1DPk=< z32X@gjpi-)+lLigdBKY51wgE3tu}bfV4K5se3U{k zf1Fr|VW>VND6O`}B`^kHYJgNS1Uq*{2e(}gbRF2RhiO{rrC`AGb-osGS8TWCwh?FX z57qbxrSPlT&ZB_UK3u^&j^{%mrOrm@`5XMRyY@{6cw$F(&bdYsf?>O7)t0T3aasLvF9wVWdt2(t8e{kJW0e&CS@TioxvsdP-q+ESuOopLZM%wGD z{Ii%>-H#*!CKqm~p8`EF=W(E!GYQ%$9mLHPD2Szy>RpW zzO#DrsD*HcDcub%VoxToxC9ti;n=jea>ZZdS$-}LGKbDZztdsgPSyATeTX>u9?Cn0yQ#yNJ)br4N6EzNs2VzsP8%N zdC&i^^?hsBdgj@CUwhws-}|~AAd`+hubdsi2C9sJBYDAmAPInirlzVW00a``1Azqa zfItHn(gpgL8xLp%_3(rt;1d4!AK!k?`KoQ~%vjb@I0n`w1s3#s!0paH70dsIfqK^63D}ck6695($7v=e#4v=$& zdcbTUaDXNR=?Ha2ooEYj0q7%aVNj&sKT2>&IUu0%;9ui;+Iqm;ke+;= zFqdB=^8YG>x@9G}odUwu6$(ds;{EDR5#|B4MP0ie|DVk|!x294fWHiT7~IbO*ARAI zZv2LDn7bELRq-zf%7yp0&jE@A2!q5#1;qpaPj~W6R2(yQx{_p}kA>L2`(!&cH81SEge_D88Fu)FGiv-v}9bj<0zpJC%Q2ReN zYWW^8Uw|11#XT?p^y~NUHwzTK>=1AlzrW$X_scJ*tg2{g!2M_9f5+tI5x#%`UNC@H zKo|r7gGIprQPfM|Kdb0}Kwy7$0s1>t6>g6JfI)w{MP1XsI`;mH033gbgA?%2Vp<3k zYoP#+ze#Qm5(e3#9^n5o)qkh_{}}$O%Kt|4|87Xx%f;n)nB#Z+{|Q4}VJ?1u0Z^p% zLZSqqi9pE!{@+kz=pWH)LhWE)uKyiXMM6+Qkb^s*$jl2C;sXhP{qe&*m0`Y6I~^F( z*6|Nz{`d`lNzDZYhw30aVZXjDs4NiZzhWqT**c@XAD$>){s}=mQDQ^>UgIwtijvbm z`%r@0BJ6(YPC!@$0P*mE_~D^ekFtaT0brCs?V!HDnGN9QgCmfr6aZ@GK!82M1MkGv@;U{-qCzZTx74(gw;xtepcqZda3eU4_MjKsY6TN{p|IZ znd5-9I>vfGp;KN)r|6T(0YB-Pr-_@;a*R`dh27q zzo9@pQ~Atb72f)AGeaMG!7i!D;y&A+36tFmv(hJjvE2^IpQ|U%3u00ob{^dr*ip;W z2XfIw%U?NvD((kL2ahWZ*@h4EM#<%BYR*fKzVmkt`5lIKIdhE$J^6@Y5;0ddUzs)&*E*C26NPx66~LP z$ag8Fmb#*o9;9>-Dcc+pt&|I8<7Q~}1*l$sqS*qB%%}#x>zNY>QJcn9sxHQI*HAnr zTmo=__si=y|z>Uu<*l68zAzZ_~;>YURie%^MQO z-N3x!v@!N)_Ply@EMC!N z2vR=wZtEn3SmX%d)4h&cb55zl6|$JerGiI_SQ0|3MwivyKCBDwSvuLCwvAm|TUn7s z*+8Y@G6#C?$KU-0JU%n%POp8E@3y^gNSk=@I_dVF>{oNLcn3?s?I^vH&@tLWaheT* zSM%sgOmd&*PwSzoDQKdG=sv=Z^h4Z#hSpk3w3j7^iBk9717_DgoChBj#fR+ir$o_{ zQDP;AQM^>YyxtM(@f+NpWuE_Fm~(TNJEQOAyhKG?SWvDRg$&viY3AbrS4pWQP5$|5 z2gvi?eL{p|CSJOPvG;KOz#{W)c`Y%gQ(Df1ZtK>wtCC5dhF3XE(B|k^$vT{WS!Epx z5tUj^oM)~NADP^W>w06@x!-JxDwvfB^=G~|wqVY+Ne0Jc+G<_Js9y5}dB4A=UqZW- zCaP=BU>iDp+uD+8Hj;dasZn%>S#LTr@SdrISc9*fNNELXm8M&y(~|d*ilas1`E`_Lx7`?9wU0gG=~~01IxFAw zx7xW4pUxg4jFMM?uo*Fb>(>-geG#n@Lydu88Z0`E{ zqA(S%H##-_BC1CN`qp=+V`?+|1~)8+#F&w;O+c#)f3eAx>J|{N`qWf^z+-#ZOQk_sG27vXdI8Lk4uD!o3)k#=$T z&UQ2BQ(d?p6+ZbmLRi6=jx#qdHMAh);wWllMkL)k@PwjJNI*VHnh2w}cw+e2y&QP> z@h+<1} zjJfQv;3de+sfv{iS{g;CaPTq%W=7kN#|_Ow@dG$vO+`BVBOhsrW=ktN?#n9Hcs2d> zl0+@X+?A^;!Vh+HeXlT5Py0<2c%u1P$rBhE51eV*2*$*fg-3gUbaLzOmqWiqb_9tQ zwG1pzHrBWit;^%bs)`6MeN5B7Da~oi@rG#*zkSYH0gWFl6Aa z+OELLRU<|udhKz>IMhtrNF_oOZj>+E6sURS3XaM6z18*`rHJ-yAOM`UpAMZM)S@)+^6CTOVUE@u?W&2mBo zS=ZX+(KdH~nXBi9MCnEt2I!TcAvI=OUpPHqb5hLX1v$$WI8`{hFud;hP>*Mr3S`Oj zLn59pXiExfOg(=hymyyP9Su%CWYsu$U0pICzt&0}%FxyLV6VrOQEN`W0!O0B7+b1) zQX+;}0&$T9z9SYht(o@b62>n7L$FN*`7S5+ll5|ck5s@&CrZ=`eu z?{1uVIK^_w7(ew5P7rL?`&cP7V&BtBkeJvoeG(0{1E8_XpAw&vM9i;el7s7>itHa( zo{10ZRezGn+@11v_rj-mEY~>G?BCL-wV_x|AxueUNl|=xI-=Sfit+HxtIc`n01a+; z{qj?P>y)aQv@x2j53t*qP>BMDlePta5zH9jju#toCAcG{9~-5PF`NmrXY`2s49 zC4YmcIX!3$AI+LoAaxz5v&$G((ZQX3>uUS7(CFv2?$^UmNf>%5%#o+Zg9Mq`y09c= zAjY{hl2V3Vb?PboW;D6;wD^fFeeBu}=+W+fEXzBa^w;C7J8n{lybhXHW{L}MN1Ml!xjU3=CcyY1kegQrpYig?)~?2axbnvR z516a=MxWz<=C=!d7&sCFm;`ksR&&;UJmki4;Kdr6qZyj3;fi)$3FJsJBuc!~K4#i~ z^W4yKVn7;?`+b4hePz33nlWH|T}&Cn@TSCr{%;OSnz3tWHD0&6Np40VRBmjdX5#J( zq`C;z@7GriMk!bcngSM(o`iEiCkGm1#nnpi_2U}*IsR4U1dq~l+!w@pM&4J?El-ZI z4l3`WpAJ9nDdBzLHtYLv?|L7zjQCl97EX{1JI-dTLx=w?l}l}La^%^;HqV=Ny3sn8 zrovava&HxsE7DU;U0;V=;_u4F@sStKdQP__?n~3zGg;8j&Zv>TnfRnY^6763 zN|!2r!cQTgwbIygj_NC6cl9b;bZv;L0UfFGNU+*oJ9Oxn_}eo!-dZTD(VH=UXCQU$ z-JV6Ny%cX3mQ!|-!wme`IH^sUXx8ciwkscx^q3-|wEi z#R1;7^clLl<+^p!QOHGY7aHlt6ZX2SKuqgn!5gg3t)oefjWuxD^aA6Pt}XvbJtsMz zGp|aB7z zr|e$Lc@;hV^9VPc&$=EB3SF@B^!{0A3wQY^jBEu@DWWQ`U%xPK)G;uBHV71MY4RMH5FU}NpHY-En8#bQ0s?_kJfuzE$1HF4S z>Gf%onl#%obF_Tu;awVi+gCF&C-?3T1_lW$hOE0P8m6>5PHw3Ofi*duKOD^Ba_XGo z0(tpeNFO72u$$6uY}+n>se_VN3<#_o-!D1VpdUVSK1z-3XkC2H@HTQlp`peFaIy@6 zVX-;0gWC4f8d7%&685XTO`{Ob;%tMnf~R#9mBIqECVgTaS0h1N79yKqE+uM=6wJIx z6X!63=L>yycdU6mjlb4Ex8A}spNM7uaI-bE*T%G}Tt79QZeJsRCf_t|KNsG!Sj54J zR#%nItov*kXfrNL)9cQiWU13ABKL94i?OsPa2YF1N8%v|7PiyVc@-|TKnSz7-u$Fh zb_0@Vptt2A+6jGTA?{9q=8_nrB3G>*&)FJ~x9p~pRB!srbdc3pu8x1N?y&Rv5|;a& zNli{8q!^&xa+7L*`zK+oxr=&2_A`u%P^2h}#oA78DVeXeVPNVW~Wg(sXn9x|MAuwq86*hJd#g*#{TpFByi6>`iNak2eOD(DF{n=LG4l z7-~$F5pS%!EZjWtVhE#O0$Wc8*8LY03EM@`BlAC%52WpXF28t$@!mY3wq-|=*G+ps zDt^2rJp6ND`bTy8+kRa(LxTLZ{(h=E`~=NYl79Iw^R9cwp|t+W8fkvO7blx>8vExC zm0I&z?+U0d;n*~kScPrsG^Z@v?PcdD(^ef4mgHu8dMaIR^E#GkRYO+BMj!Brdp znff_$atv-t9-2v}6sI*;k6%raZu;>kEgzmQPZ?eIe(YBZiY6NQ*?Ww0v8K(t$PHt2 zxr4k9Lng4!){fqU-=(9;p_=503fwrXm0G?q8(U+4^<(JEo-g5!<>W}h_H_rT|5yvz z>bYJVpJ_u@M8831BqtWOGL!Hl!}v8rk}IakhJ4=5|2W5k4lsyv+hNfgGd{`vVZ$LZ z))yr+XKOX;lzV|^vCw8T<@kU%HEz9S-8pfU9(O~5qjTOZG;peIVQ{P6f#O^I{Cf)P@Ab}vz126AE?`Dqm@b+&&ztLO zB;8*UC*3E;-9RQ@X#+)#{gaPV_ne;8bnP2Dn7X%yt?z9n#Ors`r_?VU7>`g$*-vf4 zv|ESwqzg>c<2CjLn0uwgt5~D67$iO)R7ie*4Z4#KuugQwQxYaruI@yqr;&GZ$sm2K z7&3DqRq9pWu0Bt zX7a3jL9h!mfOUY}u;s~RaXM^$icg4TBx1jB^ex*1TGzz8V+OT97kIEY58}ojjC2)$ zm%SoW$fB~$U3DV7+*np#b*MdT3NRf!cWHo5d-MA&x%EZ_{5Tlhu^xLD_%0(&NU(uX zxwE!gkYf^y$M!gxPL+AIOeQ<;0pvh-F{0k?!=MsEH%^c&aD@!P9|j&zHq~A^6O~b z-^JQH`yn+dcWpdPYKU_{;a2%7l!yJ?@%IyH2-1riiTi+*8nIs)W}~*lOn#COmetn?ex2nthY2&g6jt{@W(wk9?Eacv88Umg z=gz(6AWu!Q#|&_v*twxeB^jBs2C;yI46e`N-7zMFwONG}sV_EY!J#}$YF$SqQQ8-J z_2tLU8Vg#>L4~9rC64HcXpSU*h}z!(tn;_-IH4N}e$9Dn>UzlH#nK$3VZGR>jb0>I zDt!W#z+)JZbH3FY6r$dHTcR~WbwJbNY^&{l7RMYDDn?ht#;H2Log|x7{wA&f&z0jr zv6+yNdt;FMqx)Wi>mVH~{^xT=J2LakUdhdS;cwgWC1vC40ac?@vm4oeF)7i^HXdU1=x#6Ft(RAG8LY-!(S(dW+_X>d7Yq za6O3_dohZf63{q0H%iKXlBwSc0Jru*y`g;fo?9d80<=Q0I8X0E54;@6-+rS?NS0?` zrM|FX3geVY35#Ts?dK8)T;$L3q~a0t3SUU!R0V|jd%S&!s|A1BDBDKQ)r82GuWmS* zv&IW+Bqb@=Psx)X3*MCKgz-5A*Khm;2YkO$eGMQzjdx?APbRjNhUqM5W0`8CkXpLc zLM!vj?`+EZPSneP`tb;jqOovswd$A(+$~+rO0Vn>9zFG2b(l4fE;~76Ju|CFAu~x1 zPy}xwDGy}SE4{A)HA4Y6xic*jPG2(9Z?-5SnFoY`0SY?-7{?C1r8+dtuxUv$gB(p} zD=7><6)i*MwAtjy!?CEIo~W0LAgy!_Zhn_$vah;ByA|$#?Isj6c(wZ(J}2WNmc~0i zJy!>%x{4Mw8Qo80#h!4yV7_@Yac!7IDct!=of&;XSkd6-7!Q`3=0)ql6yc)G<8&ZZ z6=XuAYwxS@we_Wm1paX*@RLjM%Yk?9Lrrrxr%o$9rafz42*jqAwrgUZ7Dm#RE-qzs&`&6zupB9BW_o1sjoLj2?yBOq7B5<}fY9 z1M@B;u@8grBLq&VCd(2HiUtSq znN&HfN68+%+5A@QtgAAiMQ+r(t9#KA2Qe755kp_sY^qQTbYpM}>LYo#!_7@!Kn>f{ zU+d6!x{wv0PB;JTKyfU(kg67Su`fMM{YsjD4kO|b;pGrSF)?j_wx(e+UeDKh0_a!q0Boz>NHUx?-7E(=rq zFpT|tzeQ!_ZO@^8l$a8{m9ccGa)b(`9Lsutj2TAXEN5m38HhUfos*2N%Dg4yzN?>L=BVQuK=HC+0E+TFJz_p>}g#$mzWPs=s&5D zPkBQs@ddX~qLIQa^TnWgXyRCqg^}`$hBOIE?6xZ#b7SgBPoX+)2Aw94r~sX@`fFJ2 zg(Xc!4YxJ-iS-LI@TU58gD&NNFbP@eao$fXb^=$wsx5WtQ;F(X)}~#~o_xs5pzNcY zzH^M_r+l~X^lp+8a3io(5Kqksw#B|*oG+?co5c1rd;ub*ujS8d;hPHQb6c@nVLfg(23te*ff9o!-KQI9$fMi+xH z@)^f9ts}U~__=8Mf7CvERAZ$ZZN?r%mtmo8B(c=JZ~dn2=dx$9{HC5bz6*XZ;dn;g zyc)Kawq8+^ z>@ez&9$M{k37;8i06UG89uI4uR3!3?g~TTFQL1AmJw0xW4Cd*7k)aC5W!fs9Pt3CK z~0cpKA>?A!QGbA5&^R;I(W#19GIaI!`-&E!zUFfV8$=;73){o;@ z$wt4IPQuLy${aZm@E)Z$+h8DZL~qRd6lL$WDObs7RKD(c{e8HUetwy)`Xv0y6u-sQ zY#;JqzUBZNH&v$>(lmbkNux*3^=6B#}m!5g`Q>lQF#$6EPq$G%+;_ zFHB`_XLM*XAT~8Jm!ba!76LRmm%$YTDSwT02Q-}R*0xAQOVlKYl84dj3`Vr*z4sPv zFc@V9GkWyiqC|_{2?>HAAzJj_TSyQD34-Vm-z0g@dCzGDY1pse5uC?tjl!V4A!WYyJ`g#j>FfENtrCt_jILtvcXf4PZR4B&2P1b+%C z`X2yUH#iJ~^~u37Se!Zv38=U`0T2NIA|wjAFA4?&{9v%izknz=Q9usnfv^G8c>xs^ z5{@Qfkwv+9yCLlCG1z1N^$M_Ca{v$#k^9`g(*bE`xEsP6h6L1M7<;%g_C#x#6QGN- zM!+%N|0uyOZjZsZi1P7ydV2E0oPW{0C^tI^4sO5`fw2d4;Aps;2iykuH87w7bB6z! zjF*T7(6dLN|M+!Lwir*C8yvtooDkMu(*=(F16KP3;0FG> z8vw!!`FFX$vVRpqAb%&rtgTVbE-<7w0%-@>BAnoWrh*zT#tXv@z>qe-fPXM2Gzyy! z^ME0oU{+Yb@6KU>g0waO!w&e@cxY=kgbN1Ei$*y88jxe{oB7OfdY!OHs+h0T2xV!M_BN49daAmo_AXpdC-#$Ay27rQv z?+XYE0B~0T?qzMy_p7{~w|@)#cM$T+jh(>H*9GMQ*kXr(`yp)M*gqm)G|U4IVBFl{ ze!l-1_@_k#fdDoLYYboow?iO_{;rO7!)^cA*yXz+ynu&bEcYM)_}A~>Z)R9}*`Sb4 z-hab?@0U+kK}AbXmg~>N|BlJXpu7NI9)3Z9haURuK!N?|55x` zmH&<9|J{&+yOY!JF#GTL{}YBeBb>bd0$^F|j=?HG9fj2a^1q>m@IR_mhua|Bo&P(k zjDca5AdR%cvY7`W$bSnK{NqQU6%bx<8!ZIJ+Wrq`{`mEOY0U|NglnPDh+iKUY!(>& zUoot}tR1l*5HyyUe?l-cR&AKyYy4%yv3mMvAM!|Rl+7>U@k50Gn425Sn+UsltOW&p zAy|dlz`cG`8{p$bqA=JL06ViEV2g4i`n6_ZL4Z&1m+Q9$0e?dQKE!{RLI9t`Z}T6A zTD!ZsVY~Xx6>LZUvVTh-4)=mv6U|Mdti?hdYC@Y&s-bwPycAn7%x^GWo>! zn2u?hv=JxYFn?8sLA2DZ3rCAShOCWH(#Eb!mxc5ZTg|)KC!Vag$`tOagf(llv{jd7 zsokLyG5ETogL!S$tG5RIPR*9P;v66HDb_S9O(TOzRlMbtD9N*mY^kTecY(ZiLX1)I`0q1<9{>sZD=? z@Qoa@qU@g#eW50OTafX&*d7&_b}9hv2oX-pon(rzi}tk1honK7OqTNfYgy#!smC+Fx z*vHV_{uJ@OoPr^0`V);PtNaYxv^0BZgz$WsZ z`dY0>-TOpg9GHtU2y%ljJ&`AfKe@-u>4Ti&d4K+{Y1WO03<)Zc(3lC!-iSg+vh4)1 z;u#4aG|}UbQ-A;Q9=E#YNZ?I=dRil2Ypdl;S@Hyd``-PkyQHE@*9Iajq&6SsMig&u?`g%wn-!P?XQfy5#JK5aKl8^%kX)r^? z`X!*ByTU_MA>5Vo=kT`$8;U#t$23ItQV_HuvNBaKs-<8DM8SnfDV_v{}t~m0??P0 z7b}ANs-e&7L+Xy4*R~c7eRydL?A90^;-s(L&3`m8;_+A{4NiW|^=M$R+gstGYsVaE zcSjnolS3HM=FwUy&XcTlx_AzU@qec!=&KS=rPZ`$sr)tNcAv-6@m%!QTcrrp`FtNX zS?#DoGz@bt-qO>ana^M-1LaET`alo8Keobm#!|TZt zh**Kavp&PyXPjn6&m_-pN`D>`FyW}0G)9Jm<2XoZ3$um5KQv)hjGA5rZ8VB6+##7g zee)&7ah=Tfs1MC!f}5M~?CKik)~QV4yg2EKgW(f_<%VnAin(rQZL)i3QX#^&l3wnS zQ`weUg7Im)jKi_ri?gum%u+wgs&B!cva0QwsWR4?Gvf7>x_Btq#DCw}StuC!h&rzx z4<4Vlg5n=$(}*_s6Fo33lRsKLoT#ws!N()T|1wv;>`Aj&_t^U&|AJ-oDWk7#xqlG5 z=Ush2_3lui9|V*#_nv}r=L@#KbbY91EkF&ZSIdCqaXfLTo_i@hwxDL=lWdrN7h?^* z*5|daa&~oWNO@H6LVwIZS&Wt3^emB`Q2n~kf}<{byQ_ECaio6qk)=Py1GDj`r(vdHve?eD9hSB}_X}SJo3!jg^u! z-C@@DkRP8gcz;W`)NPzAHL%@*``M3r7u3kmST3tz1D`SSuu+^n=oCI}|4t|_s_>P4Ltg!iO#QhO{%x9w zymfxftv;RG?2Mb#s<$|+7pfQ@U!*rCW8U?fe}BfGCE43%5@Q5&()JTw$H6Gpo)fiP zQaM!#s~D3eJW86sNA#uwswxr_8c)AFMX|8A@0^8iJn^h&5P1tw=slL;k>sdn>MgzA zAYzc*bDeI2vn0;b;pQaefZfT!dSrxZaUj6Ef@c8l?=5Tr- zlKlrxuC~BEN0TyQ!t}@wnOpgSHMye*3z0h`46NRTk?4LW>B;^y0nW%3Tq!R{ZYkVH zUTv!je0iaBV-x+UGaQ=J;?iLqG~`C#4S%K#Vy}sYg*%x!bO<`>I8vOEdIh;A_zTn9&g@YJXUJgyYy*5@{~jJM-C!%*JH>^YwOfz?UHUrE1AoQO zLv6m(Xi4>?7&4D>%xV^;{HS>ITwGTdq~w|JA=E=O(1Ty|rVfBa5Cjp&?l#9M67Rtm zK|{e`xB=yR9TtO$KPRoKgxc|_yBIDEXK-)kv{;3yzPp{Z>R561J>)wmBJ33f(fJk3 z>GI(2`jTbs?8Py|872C0eW@*PRDVSWKJu%*_49$tvI69RJT%TkFdoE=|HDs!t*xZt zMW_03bBTZWej%>i$Af zInYYf-8ZyHUGA)1|7m{Q*?=5Yt7XI~&bOg%_n{L{7P!~(c-H!U$hVfyEF?s-x(}oF z<|>dha$gAdW6t^S&$9AAXY$b#dF1d?ZJ_?M%N?^bo%xp|)cJ~PA71jt2#-3OJo#pE z5h2~+$K$iGe=~mGW>-wi3x6y5r!Hwd`(l9qbh`NX$XAxum2fvN-q{wB60SwPpo+_~ zuO;)o(%m?MG{-)19J$shu;PGlHNb&I^vzZ72ph>V92Cz?I-WAJ@1D0Xld&sun(30@ z2&@V)9Oe&w;rUp<{-nH;;J- z4o>w4v(MtQBE;iNNqnv(Jeo+`B zBJ0l5;ja<+uD70DdxOb`@yvGlXr=fyIXXbDO}T|;JR1PLEl%0vBQKxppD-yvPstM5 z+&%=oKJ}W&z579Z^i#_d-Vm{dCtUu7jyK4>t9;fpJBLY!-+z0&1b>3qo|;VYD2!TY zEvXhq^{J@>Qo+pn9oH3Kt1vF+s9$Y6j$6M`L{dt%RC?&n_GZ|*{QSzA9jlCQb^MaBj}8+&)62O2qV~y8Gfp8xlnsKa1BI+| zpWOrBPtvd8r{L1)MjJ1YEs@&$&g6uRu(a-bC7OkJByx%;!}VodrMhBSH$Tbpp0gnh zi$d=18%UT^or7T@9NL_lk*(1*JAP(fKY~4UY#TT*g@0s)Xtm*uqVq*rbatGDp;Q2< zbK_P1eS~IeOKZPT>D*E>hSe88&X69ul2kdvp9(ch;SsWzLsE!^ z^@o#bgnv?3xBZ2KlJ6dQVLTFY%6X$HZS`xJZxWk(b^1DQ;?!D(KaM`T2igcSdB4Uz zw0EI>{!^zW@5RV`{k86!QGt=9l|VL25JRYLuk|)~xk<60V4Zh`WL;(Nes}@?QIOoViQpNOz}uFxo7+R#z~S zua=f-F5bOW7Jsw$K#(iudh9J-lR(#RE|z1JG=<-SqMhU_my*Yyf(3bWJQtY3DWVq? zvwvG1x5s~iMtr`tM{`pvstu_T5P0JM7|+j8HJ{cPeSBGNwU@%N)Ed8`osLTBht_=m zq#uxdKY0DqCD}kE=ZLgonP3OREy4Nm=H<=pp{xg%qwhnM#Sh3h4PHE89}q4$0dbvh zt$E+yltfC7vdBpXqKg$@d`K4)$hp1z6MuTuoYSExHi_ zOvDA!;vI$}7H(9?^Yzw6H?AF43OXRyBDGsFoo{C~6@Ey7gvhn@rLQm<&3|oWHV&Rg zF7U~ipSH7KjgL){B0(~PqQ^<8c9q&cEN&Dfg9$#>HQBaLyKzsi+TbuO$L$X;Fue@@ zvg`Wxgex8WLy{O-R7UaEKeqQ&E{Qx{Ew*Z(&?iZD4tS9jbH0-FIM%yNR({~;p3299 z*-@{G@v00J^l>I*>HfwK_J2|Ol*x37m3cd~+3~P2y`@+t#r4S-cMNbIG%m*5JJy6{ zKZ)1FS2;N{Qm5cd5gs(3^Z8;8e;P+oGZrS#QLdA6qm?mD=vtR&{a$|9%HBQN%t?V% z^0#bkLQLqs7U#_ndS%N8U~3k;QLnQQ+iz5f{Oop+ZL*(u>)NbTw|_z8KQmk+HDl`! zu6h_tEG$@k(DDK=$a~iM5GTGt9=3Fg=j4LuC!|-+pn@x#E#;3V=?6IXzAG;vsl`?(v za@6K#%Z1H81p8d=YJY;&khJgu3hk%*6s|U4P%|`N-duQMAkCJ)qkCBT z!*zl@<%WaD+DUr|$KyM-6oNa|dn6MiqcP1i#PCN`bcn82S|>|)@({k5^<+F$le>h) zY^@$Hhq*4^7!N}j($nvPn>f-$qf!3$sO**RRma4}#Bn-!@PAi69g|1j1e#CK_4F=* zqi0>rH+C-g!-jN}B&m3Te3!72Tu?ig23ZGH+SwvpLTkU!jr#q8axaTF^_NqrYdFu4 zJQdZ&m7OcPf-mPf>L-00hNt6-jlVslrh2EjV;@>9|Lh|oHh0sMoQ9J^#w!X}c6#(F z=-D8<|2XJDcYpick;I#aPu_JKnU?lWdoO7Z*F!hImRnr7u+Ul&(FKn`+{{Y)lwH7)L(!cKdCo0UdVhyhG4!S8lw1#~Jg_$G*fKm|T05v2 z)?2!7OcN`0VvkHu5#`(~U9XFl(w59f33;U@Pc0Xydw*bB6?67%3s{`ZTg@%54y}@{ zr9L|dJ5aaR)_#&Z?xjAG_EgSe)@(DjDR1WJta7}hVp*k}^R?be0ce5gVBG#aZP?5n z!q5Ur+5a@>P}j%CrTXN6yFt~QXS+LZaoHp+-8%S;80a&|UKy}& zUqz$JuZAgxcIXu2gzdEvOu^aeI#$l|inw#qU44;y=qr)@6E9BVObYzt-7@CcwT`aEl7F`4xQT+2yzQKWoq+Q2T}U$YvX?r` zRN{8Y#F2e9XdN=9Fc@c=V=4W@5x+-Q(xFjlC3RMmOZSFcgQyvoQn6kal(|)@bhv44b%4aJ;vhsKh#*gS-9ZK$Ja^@4V{jg_l#jH_LE_`N~#=jZy4qk+|5`@eVZj>Kf} z_ljy0H_rqvO=cMS+Ns^6xDwmF{y~j!G^fJPUG{zI3Dl2Cb$`3diu}IVM7wHGiMjXo z&i2dkm@lu0<`42LZNxO#6o{1%_kWp?anoZRaNPQ zojgcMc7t+)YBoPjia~DGe>o&)8!}14NLop}uc={au54$YVwqDtRpQwcD}P?pb7VXX zlfKEIGp}XDpwPHIJM@yar97*rbgv~WVj+xa5odo7b_#)v)rOah^mNw|z<%Yw;W0^-Xx$_E5)EeI+^UfAbuN;%6sWLR0wXqzHc z;IOMVOIx@R<+>3urzziPA)>@g_x!njW58)_1{vL-GDrhft&s_Cf5;2yx=;8BPBBhlMlOw{@|MK-l~=AUk{;(;rPDK$`! zMK+kUyYHH?QgE=PAKMA^LGI>Ga6ODHJ1XmaP=>KddQyahDMx1r!1{H@9LO2IgS` zHaWLSeg-pG0yi<2upI{!5jHh43NK7$ZfA68G9WoJF*TRbJqHy8Ffuqcm+}4vD1WvE zP+ZvnaX^m08ZC^wLvU%_-QC?CLU0;rplRG8xF<+(*C4??xI@sO!JP>1E|1L2otb<8 zzv{i}s?%qEw)fiK`gR>!N)2sRNh^p2P!8hg#>&aYAq0?C)#Bs_aBy(5ad2><(9-IF z+#G;^nNeu)(;N%nHvG!;vxi)G4}vj0aV!lN)Sh&D+;YN#L3$QWMk|0vdn*80SuOm08T+cewIJd0g_;# z3&_&k5uj@BW(x$rtY~TO0MLe5f`D$`|5buP#MaHtNr;`@)639ycirnmOw|>mkf7D zE1(PDWpRMEq6$FW3F!D|u*#nSEP%hx2EfV2`FFX$vVRu>IsTDsZfOYtJDEFrgB)!D z)*uHUKwVCS&CSb=1z_%I^*hkq!4-ev(@V0snAv z{x-ib@b`6sI039*NC5po*1(rP6kk_!4(i%ES`pkba{@W)d z1@Qv-vT}0+Sh;vP0D?Tc0DgZSL4f~%SJ5yB{k02@e^M14tswxxKR5ewO8?og$6x(t z_-i&80smb~4e}ydAb{Z?jlbdG<*I46rSyiAF$Q}H@UPU+ampOlsbhL5!`xrs4 zav(3Dl?KSo()LfW{At(yJ!B3bN1z7874-X70kCp%aQu(&WwI>oUv3W97e)Tz0=^8+ ze=jNPXbG|UJz-qDd;oJ77jthEju#;7%Wt0SR{w%8>j3`+xd80e|AIUKcH4i!m!&}ez?V^Chx`k^B@ibANL#( z=moSyS)7Mh3WwU&g|^?+NRoK6?oNo#(C+G|F|ztDx^%eTVIU?mR%ZpTyIe^of9b_q z*_LIvlvtv8^xgm5fEdyGL9^}N@4+-dYihR*W#KFS;AGLhWc>gcGAXN$#CN|(XFt6l zd)Uv=T}rfh&hCE#7#an*H=h0SUiAaz%VXj5yPDst`IJ!~%BNZ1>BhbZDw(Ctx5%C& zq(X3GB|~DuIq|}ry}X*i$(?+rP)cA%@jrOSUF`dMgX{h6oX={74wq{;5e?C6LNeG( zoUti7U#XudN(A40OFzb7wXltH6+9|s8y`fWEGSVjnCyQB_W9aPDEA=B99(Tg(|G~I zJ4U}>2l^u*o%qPLOE)0JeAf>%l=tcpZ3SzRLXy5Sv|H>s54~0QU?{!Xjyg!S(m-a$YPv?nEf$Tt(OP@falp(eu#@@QdjCivA zQb8UT{IY*WI)5kTYG%*(R57y{Q>3%@HX@vN!WiISk(=0gIUUul`$@gxQdI?*P3oO* zQ{J!TJCZC1+x`!2oBKk6E1x#AK4{+BX7iR!nMK9+Of z_AYsXeqJ$oZP`w@uk%DSk|}zoIH8XuisZ-$%%6W4a_bTN^vD>Rs(Mr;XL|^HfP7tz z)U7jCmFsI_d05{YYSr(I4LN1esh^{;7(y~zV;j-2Mj+w4!)CC$BURKAgpv?pET4ZU zd}0YD?beR{a2?pdoNHq~v7~iG^+DtWEFYN7_?C2LvmTSUqSsUs3K4g}t6COB_J&Q? z=HP#9y5ZknaAuuW}sd)DC5l9#M@Tzb+)w|@bRO}vElXEnzKjLa?bkA#5F zEQY;VT#%X|DB5F)aZ>XVIcQWyTbh zu&Z+p=Lv5HQ6GyxB=|sP zec#a_ip+zOce>ySI{8MffL?N$!G*8Z6JdTSYh`^L5ffeDN|-$-vxbC=HOcDhZIZ0n zikGwvCdH%(^o*CskbS#>Ce-mW8^w=UsM}=a)uo(|siJ4z$bxmC#Qpb;z_*nTv?710 zar)JA9me5`HIo+uB)5FnQ!x{+o|KbG3WPZ7OoFN-(^N3E`5scZv6oc?n_`UCB`J=^ z{#H|QQBq6w#Gt1R$6W5Dd0YiM;iOnMcT!2UP;LKLJ7I6j3J6V#pxbbK1yapvCo4>q zYfKlpC6UbG3uT0}jJHkR^+ytj*%yDj*RA&xy(r3un8ppQy8HMXQz9;O5MgZP2<5)* z2p&6#HzOs(=K3hC@gUR8op~zZ=cW(_PdrWf)y){szDHLgCx_pf+v$y}%n=gCq%4Ke zvwbjTc`fVf3GlwO0;dFtoVpk)>bVZRac%R_G)Kzl(o&in1p{hdC88e zBMi)h!s0J&$)%np`i45aYl9)%fk$qXXMVF2w!_SbOe|B63xb#iw%)v--db|g7Gv9H z9FnZ@+p(y7%|tX`uB7R>N46|=D)(t>Ci~N$8z|})L&Z4-5R{?^CeA1Klsyhkw?f*k zoK_}DRQG~BlI|VLR|e+}^do;FtlFLN8{k=IV=I;jwlLkjLx(ae-j04431lG;i!D{G zAv>z>m9rN{yVfG(onKO@`1!^!{tyCpi+H_9)i)Hl%TzEJqMyZ~JSr2ell}>{ekoM` z>*)E4&RocG>W=*l0{^$ZS=RiIPu;Ljjq28!QjE2*x>M5DvI1=N=!$>(9KLH?L-Mm9 zkHV6EA~)hn)m}%Bhe_dT5jj!Ywyh#yW9)TpDVwfGTz08mTrK+P%0#cFf0~P78b4$j zSUl;O*6;#%=G3^5@4ecL5tw8*7||hm#owV7D#h-Ohvs=>z!h{Ipg*D)QwwMfF}b#t z0qvbN^mGBUD0GBNjedU?a(0r>ZA&V4Di`Bwa-P=Q!>t8dwCP>Kj?RL#Hjf(zp8^U; z$97<21)r(uNf6$YG?(RIETKA0>{TQv@$!DEL9p?G<&O`tkyogUoRnq=dcQbxztj-M z#+t7nvO$D2PJ|gH&?cj_w)om&q5iG>tQUbgUE2|CaM+gt!@+-T(GBduhi{U5=-!Ae zq`+l86FKlolxX2mC`HiS-3>x)NrwFx+tu;?uFOXUj#9!^9ti`{z$tFav$p3|fx6L# zT_k&|-Wwaa`MfquJX}wo4%U|EP3_Myz0=&m9@q5+ zsqphvj{9eW2~jP``K(z9;*YiLD!tn|;$kxzTy`;^CJX+>oJV@QABsc>EKxL|UC2+K z`>E~TPPV0)F|Hy;3H{1KdNtio+hMfABhlweiDAqfqOyOC|CyteBrDywA&lLSty5pj>jXEpu zY?U+Ce|{+CVNmHOJ4)KCj7i1KWh<$mBH-u9bx5_-n~ZFot^+FGvv`g4sPVt;Th>eS z`h~gLk`x~{A2jT;s)ku>2eF5fjBILcOsALzh<}+Bxz*zEH(KS-oXIDBTfO<_bAu?7sCvQ`ClS>{KZszUiu;LF2b>~aNkmSY66|*qZ8)1y83#{z3GOjYd{c zMN;#rH{(Gg76Eb-6I!IWQKwB8?bi>>F;r1!fTaMmXordAN*@*6v!osqI%cj%Y5~SY!#rSY3nYq=9-S#eYN!3!dsY<1Ik-* zNPh)WtK}W$;4Cjz%yo$A)!cmN7FV*^*kj1a5)NN|cIM7yDGavnSTIM}Wm~o}T|H4s zl3r*)#l&>Cq16hJnnF@@AhLf4dYXqA!(^o{9&J7jGig%KZm-cm*Qh{@Og!tZqb{d> zQ?3f?Y4qf9ncT{wn)u$(7kW!^GE>+lqd5=sTT4$J;c*|kaf8SvkPVYOKE+YUtLC4M zF|7!Zpw{-2H2X^328q|rK@@O9IG`k~R<72>!}rxrZOzS2jVl%Dvu1yp3FM2F`=W4D z%G;YQQaI;{m5vC4s{J$HGmeU_kmvjPOb#sYcY!|*YD&EYZ>h7Or+$K?5J>7OWR-!Y zbtQSy6@b26)@lZ*xAamgYwxik*{|QFxKq#byFR!n_Z2e34yWD@GRhq?3c-`#r{qX1 zMq_!^geKFbeHrkyA18nNQHS}eY#k@5BqGTYuBx&L;Q|&(vO%nWE)_-VRY6Ebz_4IP zT)4D$n`(D$>*Wu@&?StY@bF3sc&T{9QzFFj>bg93R+a7bRsvsSNO#|8Y=U~4KXGpf zrD`;#4y)z!r(VNw*?cM=sMM0~ON(_`sj5Xgc&9$*&*TFav-9uoN&IgiUiKsIW8s_U12AtqHB zf_11Z8{)YhEV>L&WB0tKdqmIS{S>$(1K*eL@D9PwEZ>+6AH%>)rwL`O?b_j?2M+rA zwHW9Me}q9zlwp4y+B3@+DwcER!u+G*#}Ic7P(=XmUb0F&jbG0#_x)Cix9IxCa7SGJ z+=IvK53=g82(3|HY&M}434dW)$NoAunBv;1Vs{p7vTxX^I{=a}Sb5emiECPhf0Ahl zG^an`>Os>+k0ju@P|GTvLXZ}3{JfzSfEL6cOBT~;B`1F(GN_uc`Sa4R9hG0leZ<;A zzckv)a=YYtASk-za5?$_U8999X=L`gdR1OJuOHu`Fo|FDGm8U+^eUANIi=8qa}L*>~m9c!rx8 z4)Rcg`!@yYsR3KI@2FyaB-VB5RVjxU&FSh2UzTg|aUySjE{rDf_^~n4%#(yj(dk6x zuFQbwXzLpU#JbGDwNNZDJIdUd7Hx1E;hrUj(L&gKhW?U>Ft&eApwn>_9D^6r+RXIj`PcRyH41o*i2QQJ zHUZw^A-&69d!H?H$43$BnO!uc&BWZrr>V#cBZd5tFYgmy0_m)y^l|@sH@<0a(jYTw zz38=8<0B%Npgng3tL{9JGx{5_vcjS&WX3-?$+1l?JWkQ|@w?cedqph(S_Rg|aa4bQ zUPBsxRn22PyU4-K8(K<;Pz^dx!rmd(v1IrSwz7ZZS^mN~bEwyK^|d2PtIr$%ob9** zb)%hzGr5uZxzGBHy*y1K;(>8QKUW77Cf_jr>4#>P3j9R}S%FhUR zwNagZCK3c1|FY{PW7goT>1kc+l+u48SH9eGTTacO^iF~v*8gj-=#dmTrP;$N<6&W^ z^>dIqI|TgEBkCN}E~=rvk-fIOd}?(D9>K`w%VxmV9#=wY0dKcgk7Cr{WgdkBAqUFynA zxGplY`Nj;qS?VE5V(yf_b2U^Vi&_t=6;=Zh$G7H5(HI=Y#0TubQ=_O_;A)3T9$m-C z5el%~tj5xDT8f8LXA)3GuSkg^)LuLdZY>%UJbsH18iAJ=aJ*}faZcLWM|e;z#4VrI zJW#U0n#ia!(aD@?MC-G&KKp+PTr;G(Ea{-?Mq5ENWud%Qe!}4^k}ck_0!f*PGvS&B`P?nLPu=nU1?X@|GFR zG~7;k1$0l=q|WifYYjOC`!I9o?mXUN`uTxBt<~M$P1;&rNUW7#30{9+y#@r&qUuOv z(?#Z`mI=j%mk5R?>^J$QZ^uZsBWQ#5>%XT48uL=cJi5qwEbc~9-!YIb*`DTS_3Lyy z=wK-sG_?w*caSMX1*G2bNJXAH50^2gdD|CS6Adph1s?|FR6A2@zB+bu76{g8>vlgy zy;NHD9^b1@90stYA54D-vy~k^fXw^KXC$MeME7UWnX`lE@*M>4)f8#lPpJq3(wshG;V4D2=(T`^$y zsGfc6_K#Tg^O%3BTVyd2uE9p%$lsZu%#BA*(>{cYXc?DSQz}Rtx++a}jZ61mby0M} z@f0sVeQ?jem7R6JRGcDTm|uEDp&b5%avXj*k@yKUybXUew6_Svi=KF)GUm2C&909B zwZRYv>N(>(mv^Zy57H_?;B}_+0H@F&pOwj!a5eZALEnF^fS5%_D6%d;0*y5h0yVGD+u3m>DizS;VBVTOvYv(AZL)bIm{LX?uNZNwVU}78{OKy?!AY#eJ|U=g#V~OBA1G1 z%6nyw!B%f#qR{L9RGZP+z(9G}Qh>^{A%Jj=Q}%5DpDcAjMh7-RPR8+Yit z;I9$iZ@lf_9^VX|(d8P-X{MWs8cKZ>>575k8?2Q5 zkT`#5L<}Lp7R%S!G;H+sAf@>>Ya~a;@-rV>Z3^D?g*MWGvUAq?3nNXaLNS<#FK@9m zp5LA7rG8IuKoKb=rD)7|FeTgH%pxuEm#|!eD*CXNEdKE$nV*K^Pmn-bWbeNIndbF_ zCpt-z;PiK7D|1n%$5Q`M%*CRaoMFI5lL0Z!K5jyr=ogpCQss4d=TCJwyk~Zk_GyTk z;&Yg=n*pBwRo@V~6NGbrkZ=UvuMZMDwk)C*NaLx6k|ILy01Rqw#;j<&`6J`<$h&_9 ztBs3ewfj>-U=ZOdk9IidOvDDbO-Fp|U8%r$1=VzIV%5W~o@DLS-a@bRdgV`F_e5|} z=_EoQY-J_~#G_!)1V_;d`*KA1p<(gWt%eM=y&1$O^Q5XjsPwDmUp9G$Ia5{c2%WJC zXO6>^@<(|2m}Pac8@qoB;IN1?Syz7#lM6=^^($g~MX{_YrJ3+ilG~&I!&qx6b8((; zfsQix^NGfgU1h-LV@!DdwL8QgFZf)WE?l4aSKQ)Kr>1H?f31c>o#1Pw@a-%_c}K08 znA_Hj8;Z31Xr!c=AYD$V{8FdEbN~BbUrYu;;`b3}gS+Tj(aFj&DADpL#Ug(?>S@p?_t-R&QH%_NPx*i@9|F$Bn9AUd zr>&k@q0uM&@QW^*IwspXPvEJ3fofAQ^@{LO9Te#277!U9J)hV+2DX17pLl(}J1SXV z*@62kq|>KJmUKB%VEDW0XoXdWS*c1b4N&@#`zCA({%JNu)Gnp1lMMWh=(btr`)MDr zr5cRb@o`-!Gm6hBn+Kw68x^+g(~YRg*=z|E4(&uc9VEE@g!twe-Rghss*qo~&8bap|y7l@3wKbkFG3Qc4nRXr*T&Gd+>nM~I8wwJ`yRCoS z!E}-Q#rUgBx@;(Sxt&RN9uICodce$#NF?U$5rkiQznVv)vY|d9lVbyU&+>m*~XH}-ahLQDZ%_wNS)py~D zvc5|s{erG_U5 zGuy>t8R3PW9kJjB)Frii;r5ksDcyOa(fGD^$)X1w3LoF4& zzPzEK0_cB!*uBEauQcRhKM%3sKY0|nzwqK;%9Y!knIb8@ z)Xz6Y@sESKn;Xa0AFS>?}}vsG%t(ekVBYc zB(C?fG+?o#6V~cEmD_sC*@zol;We`oXuqnxpP=4vv_YOd! z1sac<6dNXBwtEuiIvZkAtTlnEJ??*Sn$!{klhEmyjT+Tcq8*gfuSoM;1Qm z55CL$_g)~0#+;pTlr<~dg2#zpW#Sj#t8vDyU0FRdCrTOoYnRrSoC}nO3ik5Px5)CnyFXKk3k)~B>Ds|G$(cGb3-1<4QuD3B4$`ZO$M3rDb?ASJmu_F# zi^0oRsK?@nSHLk(paW1~dj*U~Cw{K18g|e&Cz6L`3?w9a!8N!qf9t*(3L;!jFQkod zq`m3eLbbM~$ziogoc2J3c6yc0Fw>Ai-g)reYDO#Y3hO@7?#p3wCeOAiAVHnPIR{(E zon>a=Qkd;@+$)sm`!*BoMDTwJQ+gh+fllFk=h1+yhAL+1STlX@$U#&ZW_P$96UCY+ zpQ~BDE-~JHV@;F1xfejRiA@fDAr0N9$@Te^R&bM^s7i(xKVF z!)^{e#$vLkNHyGN^R8gpyQhe-+gDIRgVaiAG<2+Yh1e(J$HL?Xsuwz@B zLm!w+Q0sNycl0?hLvQGX4o8W8$Yj}p-byduMYWmyb{gYgI{sPn1eMR&LOpySt)#b( z({@1%ysOE)Yu~U>nd2D zSKK?^@tv<>G3l}`zq#GBMvgHNV`$(?V*l|&<`;3abLS^Q7{`^*xxl_Sazr-DH!{!> zJytqli3Ui?9Nuc8dp{=z?Q ze)OsI?-7^zdEbA~>-4pAE<~djTGhdmUZT6G3L|dcmG@me(y-88@pQ4$1x+zf$vHhI z>KxnY8;!hL`xKAa-dHx29B-p6Qi}&zxyU)#Ql^NDC))j3H`cF0H%!VV=ryG25LG4{ zGA5kX)wfP_*5;?LIBy)&<$$8gi6=Xsrz4|y^lgY})^LCFW2dSXwU6}XXB&L+HAs_T zZt*Z-!S#2doVAWyn3nlhy_G`3MP(A60~v)kb0fwc`u?~CaY>lNluo%oE+FXiHQr{- zj3#zv*5vbHiEDGF@Sx&;G5%*KMD;L42I~dnn=8Ps%Qhdh!4R4w`9hI%{`qQwhHMN< zV4C%?6K{XFk#2c83Cb;RA~~V~lNixuuvu!!bd-=#Kz{NZbRGiUr>PXikeT9{Sg|soHT&(i&v9 zHtyn_(le7j_kw<;URz{7l^n}!_ISlv8vBt^zn6brxQhDX2uk~f5ASPfm?#mhFoN{v z9i^}|yw-DcjK&w?sX_i|HRIl=+?_=-1c%e-nt-T!R8H#S6%gCdLF+S*IwuRMl6G%e zfXnVTpXUA)r2?F&EP>ha#>@zF>WCU2cv|{I z&m@1yuP`Hg?rw6fztY~r^>JIs%hVAuJhJm~z?TL0Gr~U%=aF6E?*$?HtD#FDEJv+! zEkB0yt)6;rw!n-6KgjXMdK6|ao~lNc{9uyu6A|<0XZHaF%@^aT-OS^R!dXCbDl?;j zF_U#XC&`JLWJXS{UlSiqluUfd{&1ghhNXYEKrd47)SuU*Rr1rDnSkE!lUp%`t9WVE z-TNb7by8nZ23iW}HJjxfDmYWwLW2^7*+u&FPT{HydQUb5>=?4Du(r~Uv}{pulS~^7HoqqfG|DW@|XXk8tt~;JPuKWJ%jEP=ThfCT9W(`t+K@nVh z+`OUySv4IaK3)JXuK+hMFFy_wlP(wm0sV#JFzJI_;b0h4^j{jXt{^La1QL_8LLe2@ zU{HXv8w9{70N@i6um?FIZv$%8l?ua00BLHop|DAaEFe(%#D53JkHbMk@Rc z-3p)}tp%_`X831*aG)#L83E^pgCW0iP?!hQ=dY737;0ntD-9brXC6H$*u@Q`B=@Hd62kchvjZUjg1jO^LV|n%kP86h z3AE?=b%CyzGw8Q}@HdRi!OzDT<_xe!rUCK;+k%iEI6iPIcMt&K>IU-j`M2P|CmcRL zfDIUk09b?Uz)+ljydzY?Yw4Fo4J( z;UeqwSIF*va)9;E0bv9D*OxjB$y^YC^&g^}@(S_-k#Bte&vgH7^8d&3Uta#NME~E2 z6x<+?-*VPJ6#hSQDrV|NbKMZgDNutUX94=Zss^AxMpq4F19o%zU#Swp3ONnZ zP&*`}x%i%caPvO-0|&zuz@8u*O)vsz{|9US!1aF38w3mmX~N*(UmFH##moCYGUSi} z9g$lEj-=*q5ePX$|Gifp3WVAG8a94GA%K;utCbfHve(FyAi#$YIh8gb&);MQ@Nh$6 z2&4%BnWG=T7UqictFe5@IC-p~cGj*|Kt~XA8`>g&ekuGZ6a2UAkB0sw6c9klTwP%v z{{tgYII@OdxFfQ!_W$@oGLy#|=?4z6g8vKV75W9c{sSPdv_|#}^sg}Z0X#skD-Z&* zf&DwUzfdPP2mH^_S-$d3HihX}HY5SZOB9{*Z%f5C!J|AwLeid{tTZy5GZ z>PW{)VL-7Ehq{pV%Nl7a53aAHm@BJ)oskJfeG!5P=DC+X_NrJ}&!=2uENxzs z52tI+mxoQx`ckofzZ~;4?am2YPE*WBpvd>F%N%kQ4E{Ljwgb`Hz$op;h0`O~i4GFA zXb3D#&`ns94Q8)Hg?j`x-+U{&>4a|@i|hiN043*y^l4OxSn0gZUDRkJdd5}6kdY~W zVcbJklMFXqCB5+*)M#+rg-m-4#SgD_leo;G&!0c!^!KF;92cteRtvojBc>qnR;JGz zP8y&fP0UelhNCdc2%gK9rzS8^6g!16FQZusFaMk_?0=g$AmG+$r!E`#K*N8e$*mqH zI4hCYvwTc#v5~F&g9neiB`MOLvnHK?QHH772IYJxX&IZ9$j5J%Dgk*q@a2_^kS2uIdDzzA)3taifS8hrSW^j2 zo!(*9ZTK-K_t~pHaHSz#GW2w-@jDIaRA7Q(N7nB9oy2A(!Sj!QMEdOV zLWo+GIarqkTYO&pbbxk#9=hh+&gWvrQX=yc4DtE`1O>W8bGoXVC>iSajYLDGUrZC? z6FYt{r+M?KN@(PzL{N)F>eCmz{)-cb_vEzp;t6ZlA87Ubz%Wm+c0J_b$2M2V?=-fz zELCSwMm$LXH|-`|Z^;-<1s)22&o1Z7-0B5b5SRIz=G(^pj62&rZ9K>~l&+63hD4@x zO$Dr5yc(hDFz%0|&UiPm*W1C8y6LIdcy*3`NNhdyTy%gTq!?1!6ff8&`?;SqQqQE{ zO11s81>3b}#J1`52QHtdpA{}La)5^fEc2lv0_(4T)m|`*_@LuP z=m*H*ojrYsXEo6n#6Jt=MzObT!r6dGs(d0V+%;j0ZiynQzk2R`c-~9>nYgW%nn!uS z8+d_M9Hj8{r@N-W(ia{>4WSQS(y*@D@BX`+W(F=)azSdXm=Q)#R!UF8T8Ijb;%7z* z4K?KtRd4v$E%;j>ylyRjhbgA|;P5^2oT4BZ++pczccbuq3&XoSzcNs#qtVWiPdP5x z&uopKMm_z>oU)z)p;NdcGYwEBLe;kCdqb;`XMI%idfbmA9D*l%X$B}MUhHAT4xx6p zoq+{&*p8FusO|34)%#sQB&mVy%MlFUUz-PLSA8?9a0=TX{0J_84Nou`@`cL}H6SsJ>Z9^o%`mp-+MfbM6y(Gz=`V=~`l$=_L z;iYGsA^q#6m7S)443$5cQnMaSYVTvXYs;q27o+EB$=Tyj;7?0gaOMq=4d)PaI6fqc zwYG(9N-JGznxj_KXL>AT5trB~4Jvv&vH~vzf<1pG&_)dg?&%EaX)4v*`lyVRkHRTK zalelmRks)H^X$2I$B&y7wEmo@ZBb>?D3IFCR16MFeki_wtjQ9m{AzeHz&&w>v!Ahn z!CtRaP4cvUS8_GknSC}y*d2fFk;I}VAKY*KIYF{ik$RG#`Jq5+r0&=Hr?zcj07@^d zlJMYG4YrU5yeeMd>2Fg^_@Vbhp5qo>C24!VB0f2>-S!EeFej3;Upf`vDD=t9T$4#1 z-kJ5gde^^ykO$|tWsyDcT3)NM7CG`!u;0N-6BZ5=`;!o=U{^6Gsh+NBg&x zQnPx1q_QOl=VwireDw08Z;q%RGuiQlv0cziKG9?DU0gMf&2LSc!rlg%&oMSL$9z3G zHY0ipNsZrVhPN_@gmm*HtgV@|Wzp+?egDD=?*w&!wd(uAMExpRV22Jvfmr-j>b09W zicTbswBT{3kc9F;jOP`*mh~?RYyw=;TE?u^A8MC|7`=PsC*Zpj7^J8b>sB z78w8DO*znN%o#ekxZW`RWyYrhPrW<{<-nc^{-a|1Fb{yzI=8A`L>9F1wMzff#@Ik` zmsNRxmf7i+i)4cq5zVdVNWQOc&8EJ-eXaUsl7c?FOr}GFh|qer`K}6xZj;X zbTvn2SDaaS}x!8k#t2S*^D0aA5TXTzl@mq7N=v~K0w!w}#`N#8H#WpNeyhjbFCpMIx%aoQE=${3rBT>Mx%H83iIMyGJf@G+m7{%y;v*S z`>da4#?ATXnFx7NrLS-HoI>8B#EcPaLwucrVbdS>%AZhZeQhIs9a7+xOYEB*|H+;1 z;Q@z#LQy{L`xN;`VBE*L1+*l!{@po$p4aCzWV?v@P_%1BP*&K2rfkEuyZZM{hUY?l zxFvgd-?8%Z%o_O$q#q@`i%H?|-K=5VXw{!NT8VmIRs|6bQn95nAn{&y&9~T>`1zc{ zho$1laG0SQpoF!Hyf?Kr?0~|XXQ{qv!Fx)z*EqztHB#;#ZT}N4f*-5A^83zzO;MD_ zI#(?x(gUmERoD^rA66;mTckHqTDv@3O{x-0ovR7Xc-1YEbr1#0@)BZl+uHt7l zcMe{|}>15>FBj=HInt-4pS~c>7g1LUuWT=RLx|EwL{n z45JiA!@i#`64UQGKBE*}0CT9;+gCX!_H-y{4&@POD8-Mh}9UL857AKH$#bK!rO9-H(JeL>Hx|O=Xd9QakN8NSnwoN zK`(B@;CM9ZsnIuVN8hM_3v~9V;tGq8vU4R2E`WL3zC&RXl$~vr6X@ z*&M%XRc@fQ&Ey?2O|D$mI&WA+S=b|hQ1aq^-}#Cgefn`^pGorLBAQ^i$MPS8lAN8Z z71%4w&T5x4+z(terftTc!q5jrL{n(CBEjs{RC|V^XPukR2HLZKMA=7|RcUNdM`KBQ zMUt4Uk{rZ1XnaWD@`m4MoA@N1WY=4x!oCw|sM3SFsNclmyGRZt0o>a>3T0GOgRR57g=q-woYl4{RBi@p4-l?B zB}Es$=8Ko0jaet>8SvL(lsrA((Nu8TK!Xh#zPVL!Zxx5 z2ML^LGf6<@R<4f+MzsZjpuk13!gssruKFjrGMQT&*?ZDul@C`tT{i+krS)IOoq0{wt4Ckv*8YrrblMf* zdGVa_E}trYqJ%bJ?S%@lJ)&%oJcvZB}BC1o`W3Tlz=$=-7ulD!4Iu zHa#%kM`}N9W0hK1h|coH1GaQX^{O)~Ii=@)e)Y_d{)1C0=x9i43_^x)aK|vEFRQq9 znfvskcSdOo&_>FjSWL{!_!;IhcJnxzfkc*TpcD$h8u>MI^PsQz%2;1f`b8)0B;|h1 z!@He@3XT*Y{_7V_;syR*EKYlH@l3t#KoMtpi%!&1t@cl68!RR2+0PpeRxvW3;iLAI zjlOe#Fl%@SJrOW`rd;4HxgfXyLPGsA^j?QG6ptp>$K{c=5$iLCg{SOW(Vc^%j%8i~ zm$0>{OOz$Ri<3yj^&Y%0pjNzo61wk-u$)r4<hJAehTECr#sU@w5newYkkX% zM7LfoBjs^Exz~8|l3F*}rBh+@DZ0h&j5JYF7j&0c>m$}(p_59S53{$J_)J$GW}wsY zx(Qf#WSp#FH|Nz63p?43i{T61d(dNqIMqyy<1r7mWI4YR1zXGR}nM#hT8Q16o`IQ3`#N;PC4HfEGtyhd! zjCB$ulwX48awKaLU7b59 zYWPSTZt@j~C<(hFfT3_2MC$%?mWwriS<|#OXEyZLZ0DG_ITmzTM1^d%Gpwyw*~N_y z)YRH=8tX&wT>9Oao2rK{WLLflsG~Hg8pG>7bT!?463O80ZR0hUnt9P2T)D5y3-qGX zYZcD)E;Sk6s*Ht&aa6WX8Fhk$i6wa}SyRuk!8TM>S*i_Uc8*vg-49ufl(-UqVjz}p z&c2IT&X8_?lb*m<6m59(a@9zY#+om=aqX?*fKQ7@@PM|t(PS0{J}1sgMuoDEX^L_S z$NH3#&c|3@lj*G_n#)WI5^sZg2Kr^fG0zvy3!baYYueZQt2dampeqeo>_ohf#Xi&} z^Io2^y}x;1d^#;9<~s16lQ@-xF0mb$@b9B^~^Tj1Sd&M8b;lJKDbE|Yta?kYiVm^P6S~U=LH@HAE znOPWtAV58Ld+sf!V)x|J$z&|8ShE`&!*;#c)sjwXT913j@Zt5Dtbe^^mvZo1o+JeZ zAwBLI-e;SbsHYrDDq7QjA-og}L(`R$61#y%@a^UOU{9E`+3H*c{fNi4kj7_jzv#Rd zh~41;L88M#{k7rpdX6fh;^wN?@2+Wtqw4kGL0NBR%lo}HzYe`m#?sA?krljt?rOKD zhxwjRaD*{FJUxclwO7Yj=klkuF|2nywb z+j0%5A||y&rJ&_)JVZ(3ygO!a*o-MA?MSAGMLXSQj6 z<;?)XiOcs*rvixZrpmjv0*8vQl3rhi(uQxR-xFqW@&^V@NT*e!?DJ9Mj2@EKv(3W< zg&j#(Zj&D~1z;&AnWAu3d5-y>!mHt)`^AcR;tDE6>l@+o7^D04r4L$-g(k>DQJgrJ zC+&5AxD1RR#^a-nDU-7_*IW!qlmL4`guiVpd8mKQt+#?yh(Ps>V~lF%dBG?g1!G#-zdTqhE&%B3Nle~*lBw0_b~%Br3go+P%Ad1iRr z|I%4P6ROa%oQ=VZ=g$zbA)owgG|ab2%o3fT57+%C( z9;HL%RJfzz53yYA)@w$*< zoh)IOf1C=f1{OtkxNSD8os@@}ZHa#7aF5AOfd6g`pTduAT5|$v0r#r=18W-D?o-v& zP5k%VadEt_zUsCtUk&pvd?vAee4c&JB-`6-JZXjz9mH3)=r|*oZlf)|XUW

u(*96@+GBzwkYtEw;}dXZ~H6rnKvoqY%`f$ zKFu2Otu-36SNw$9Wjzf9o*%Y$vFReT`mj$uWb7sq*V=D%Zr0T4W=!ShFuI3#BAs2!9My1}r7)#>K+Nx63Z!O0dWRwrIv3yGGtmU@vu*J>YY$v5 zFTAT@Am?v%!^w~bB?(?~ykDq+ml_|lP-2tRww9h{eA%>)(5j<6J*?4zJj{3KI|n3C zPJli$JOTnFj%PZ3F|J%SOlk#Gen0|)fK z6p2nz>Mih3wmci;iPmbjENL1!3o|zS#yFs1jR(9qse=0uW z(bA1#=yO3{Pd^ZiwFq{)4h8d%oGR??K;EyeonI=IOKBBfBw_@fqN=|(?UFQq!ty<5 z%es8s z>Ve<7YxP9=Vfz<*s6h`NrB$r#2IkqW2;-oV-D2NLEQKDD)!tTiV}*i2Z~D<~qTjx1 z3^3U$#%sUu#O!z{DKyhvhr6PFyEua>eJT`99G!46?VC%)6pp^819X!gmhAiTIdS^U zVd`Z=UHI`2fP_@s`)Cgof0#C>Xv^kaV;z3mpb&&{fbe^>T4~w5X?_>!S)!np?_w6+ zRSOwan@=dALItH_DXV+Qcc1QNez?O1p78Nw{1B-~as`yjSiw-a(;U+vLmt)lX0G-& zA<>Je))c;Sxj$za)y`u!j^w*1I|1mVjFft`lGmd8a)H zv!47_X>%qEQQ9R?v^2p(=(|2K zCJ-i{N<2gnLMawB3N?o;+oU-j$92};9{mt?i zTP0?TJcf6VHYK;ze^_fj^9>nz_#1gLZ}JO~yuOeZ6sNY1p9E(cs0IAsXTqU- zk|)Mi#ptKY;RKG0xqTc$a*tRF=cvIv%4yHS`F#9YAoe4Ze+$R_Gol{s^2C;0pR%Ih zg4hhWo=zPl$^8~)Cx_cNry7l|m{~PT+-fPdH`;@3I8WjesyJUt;=dt~^6dUl&w~=I z>Vs++Ki$1%Z<%b(TY<7G<&33-VL<*pVmpTLb4n1Z8h^=pKotvtSej0_LFYR;#qRg% zx|a`{BEM#fe@w6(xrjrOiK}a499QT|HFhzoQ)B#_-2`+Z?7vuoo)_acuT)EZx%$XX z6}{Q>mCz_r?dMq!ip#M0Ra+-tFefH!HSQ16OfG9Id|>dg+^ag{w^U}KL^zfp>+bk1 z)68Ivf^#PiNS@VV0Ko|6hOSg<2yr_XehTlh8M`OOf9pL_IbzZX$kpxc$I)kIPKOz& zRpR18q0{LRXUF|l8+nHXOpEFOd$A@~_oVh6hng4e&)dqeCC$`tqzhw z`POv{EkqDILA|)MZu^IyM9KSs`2!Y~#h#qk^iWCG;nQ9-k?=q?l2o;!DqWzl zk8=vPpPRttw7>g|!&V}jo4nil#WNEBLLe-5SI(YCy5+dX*MRZK)_5Ovo^vn0^r z7LHc`sPKDH6Cn|TvLJVxJC4H}yKfEdT#zR*zg65MQlK_y|I^{>^DIyHCdD4Fsba zMZQ8A7@r&8D|mr;WXDOlqj>|` z1RVfigjz)cA2b@uk=!|Z+|Z@-9XXQcmy(TVcdP}_`aTf3AS~l%MfquVxK_*Po)&#( zhg#YI5C(TTz-Oq4wO$vb1s!>LM6;ziK!h)+E+yhYHv}P4Rv+oF& z-Qnw}f>R&he~6l(E`VADR?;;)a7T&m;^K;0Ric(jal?w2x$2{_>9F})yUqMxaRY=G zB00dXKfI2N*diwDw;O)c5RcZw^na( zJ#<6}mMUb?3y6JK;R|7WiE`sW=}Bt)_`v?rvoM;-DQPPag)ipXf7u&PV$$M)jd<*h zIY&v&D|`yxj51z4@e6i^H|uq`jp49ljve;Y5qgGo=m1YK3UFL#8N2k2VOzkgzEkzZEvL&=Zf=m!>lvA+r* z;|VcHB3AWm<7cKK^HZhbH#k%Qh4h$wbE2P*`?DzaQD9y}$)*xwdY>QL(My1wyO#Gq zeBp9})Nto&aA}*pskV+V>f;=(5edY&_j>yxoc}N=ilm+Ef6Y2-<{pM>^*HTahy|4a ztJr<68hBzxiQGW1ALVF4!q!8)xbtc_JjCpVHJ{f>F<0Bjj#8_;cKR&d+ObdQXU|SE zO7H^p0Y$Rf7RtcK-fPx~P18Y=Vcy6fdp7zB!pF<)pehOGkK#G+9WK265?tPhhLMb@ zhbPI+)r3>^f7*aUjgRhC+a~E2-{>p+L|LZn&@C{RKg|fu5%^m4Tz6Y5CW_#;G2Nih zrWK1ncdENr%X)`x+WF7N9V1)6w#K-)+5VGNRLZL*i2&PVLf(QrCfV%wyX-LVqc z@V~LjJ~y!5KN>`Be=6xG1>5Si(cyWrJ-idmv!A|@ z-mB<(HZ~HG^6A}~3K^3)R^3ov*?v<@_jSEw7(nx}%TzQfSEj#711^t4Gjk-4=ZTCZ z?r?4^!Mt&uVn+q+SZ3`0)#rDGSSOi%=TYku(4~TrX>)-~iqYpcfh`Y9TyM9DdYvq` zJkHx@x>eHMh42h)i?jXidRt3L|bf2b#)6B(cUi124Q;({k#1`Fx#91x3h zyyI90e;^2KcOfh?;CPE|iq+p^8C`2BD3caKPr~>?@=(b=2cxkWEy5}kUv2rxLYJdA zw~azA6AIaQ8;|Vr2QlK#I2Z{x7epdmEDMVM_gUnxopKFohuj z3Z`v?0vVZiTt)V)wNkqAeng0pSH9B$+(TxrSc&r zC3V*is_3IJmiStzj2~0J+>(;X{{h#wKBSisAq5lyI5U^99S0P*?7jy(8v`^rHJ7j* z2NV%AIW`I}Ol59obZ9alHZe3cm(e{36azOgFqd(j0x6e|+6N1N;GlqnFmy>scOxx0 z3ev4UGd6yY!=Hy;nLI6zKA&sYS&%PYXc%gax|%Bl}VIzaxy1gr)SgbNf76aQZX zIRpfRL}BtEB#Nhh0fzxpT^#^?0suZ?aXt}oUS0q{FR$3Yh;W2BKpx}8D9i?64RwG3v=r5OkRHfK01(XT7ZK#(0!QV8 z+(1wVkR^(L@Vj#mKv6~q074D;cRUv`0_ucx;c-KJMhmH0Usa03JOL7EFm^f7{Pz4qhN^jpEc_85l|0+DKAQUd;s2Gzkk0yL&?hu z4s-DQ5B>N30(JDBD(EP4{h9dRFc3O| zZwmia<^Lt}|87Xp)xqI+nEiMB{|JK|p$?vZ2`JIJB2fm=fTL^x`){ft)(X*;>g@geh1Ontqz>AU-zmO2Xn-673 zD~QK$B?EvwFgOyG0zl2^3$TVG2!36w000PbLCp+xu}9s4?XTc(9AyEZ69S4lmS4z! zKehzBA`mF+{ZtLFv)P{N2o}bz;?w}{?9g-Z4UKoNu8unaT;JrSXGS9QFuxn*L6pKU;b`G=P$&MQ{Q(K{^&{bVf=253M8TFwsM z^(-1nXxnN#iBziFT>g~oHWsiCGG@t+((6B_8UE76E|lHqbw)S z3`Vv0(&pO$1@sz1J&GQlIx%W2f3l%PU@WeC<0*ZF(u4%vgZkYZpy^b);B0e|omZ=u z^U4I%BPtb~teo(xh^n1<@pn{%AjD zilG>FGs%x`9`04Srk!Sg-bxp-@8@}vkta0PE>B$nS)2|+vt4>WBgP}{>B(Ke*%~RQ z`bMi{O@^8J;&7r?R9n{R4GJ#qawrehe!Fl6CDW%#cVKos{)B%oF22yN5u@f!IkRYF zI%!wKLv}BO6fe?`F8m3GtjOfgoVxNBsj49)cIk1&u#ubOOmp^s2$C}mjMvb^M0(P{ zwu)2NcJs#e(Q zd%zbaAIALa=g(OLm0Psk^6&|~p? zO$|E`b$%9p^*O}DSMzgqf6o5=NDA1^blW`aB$Vg$KE~Az7|RXZ$g=bx{Wf#v=y3*! zlttjW^`gipmP|0!W94GGmFRXD?JYr32x$p>VQkt<&=c@~7u}Hsn6Q)WY=#M~wU^ha zbd=fVmsZLOJK!G0*WBU2)Zw_S{x`b-r!m8tR zEkX8N;{M&I?;Ww@n?7>h5y5j-1NOX)eN1?al|#y&z)cY*Dj<$7EFl35=}GT0zL#_L zvsJu?8mo3+9(JLK6n}s{6xJ)&l+D(WOD8)aKIl$=M4+G76lECqg{}{~%#6J zpcCc4f$Py=WGw=lUr`Dgzc=bcF^UM`<6AHwsS%xva?D=t;8`;DmAG6N`AIzA94n8* zslFtC;%AWWlAGHs1*m>iH5eOH=^L;#ltuk@R{n0};FsL5+t|oU`rD1b{F-c zHn=_mRf^Tm^;b9TkCMq>r0}O@Pi8iMitP(OE`JbpZ-gm(aM#1K_j#~!;1m4P!5kRe zJ7;=|s&4~y>Gs_S8ebi1&yqbrRt3eymc)H1_j2_+mbStdm%G_)cv26xGBievgv%9w zMwm$xl+T6mcG+aXT;F&YT=p-0@qm+YhME6nQLq$$O)q`(ei^xkp2lgR(y$eM-g!_bSH!25J1-lp z=y+mz5t$kWG_v!yI$j^zFdmCsY<)p98lyeHd}pZ?*&hHAgkO7H9J{hSTf&NYSPq$a z8X`pGR+d@(~?>uCH%# zl(PcW-BJ4FGEd9=Fp%L2GxXZarA_T?pDeJFL!0`hnPDJ+DvKS`ul8oVa^z>)C}WBd zlH1pSWk;Yv-7|mQrtssGQQj41>lCHn;>m4DfizntB*LK+!zud#`OlfE<^|X)Qer5E$2&*qO z%80S0CZEGQ)!EY3>I$BDi)$lH88At;+8fGYJSHXlfqCDEc;lIm!I0xuTx`oSPd+n# z$-D78Wu;wC!3iJeN}rs6&(Lhi!owsLTJAX++!OlDDpzfo(cb;x&IyTr2`r-SjMJHj zU$9x7EzvhWUC3Yr@RJ3Dua7r#QwbZLiSpch-())F(MDa7XT0lWOXA+suLaX`it+`U z;dh$B4(L7+nqK%h>N5vS(RNJSwcRuB?NZxX&+k9%jkxc0C7DZq7)!ps(d6mH{A%ho zI+0X2!w%g3tGzx%X4H)P@aQR795s&rASI0}shn_{WpYi^>b)a}kq#QrB|3Jv(|g)V z;`WtPLPqAAXHgev)xaOnCi^|-uBWND{2K-1ufH_5_c%8WkVRBRD-{Q&E42xcU`xm} zUF?5SDb;jb@-wV|Sr2++7EFWCz0WA~y5eT*88KCv*$>!aGe3lsneB97hb&^hK9co= z8C1UcXTVwX$?(@q?LQf zy8SVlXeo;m|7EWQmhN(7o-r#%ou<&>TDb7Rl*K_?p>;B5SvbO!)=5v9@AE#_*vJIF zflFm+Ku>;u$7T5rl@74i6XFy+9lxW6w7PUuo84Gvf^ToQ%H&K%~vBd6EUAi zWRc*6)>oJ7b&B6eZM%2EP~4+QRGyPWzB)nNZ9grSw#ZbbddpC~OGvb%Zo{ymIjZ$j zW6zAgwk4Yuf6azzm`ftjw?}&2&ZiLX@miLa*FA=Rjym&~w!M989nt9p38MnD%`C!c zqEU)|HqsW9qF=AQ&o5kUXFXUa1ryeM0@`tzWEZ!>&v8~|pq+M&Q>;3oroGouxAmhU zMD97!^Rm*@*Z#2&esR5e;a?R@ezwDpyid3#ZQtNm5G`HEvJkEp^n#} zV&X8_eb&BNvEuJ}&-P51mndiF9|PQe`oGG5A;qc+RErj*)po?SoW%G*#kx?~>y5*~ zi0@LDy5`)Z{IdK4Gy3)8V7r%Y6EuU~hnEM--U~b2;r3k0S|mrp?5!jZ}(I2+r zskm^k$9Mde_*fJ8iT>2K(8w-m-@(>BiB4ameyyR)oRTp+eaO83?9>s0q5P^N4Pyr# z6gvCF;pzQxW|P?g`|c1aZYoK7+*c}pgXF4P+`2NH+D{soUE3p%>%VXeEp*SgVLgK+ z<-Pm)lf~=1MQB=ke8pqK9|efAtefy!iEos{3Pkw5;_8yFREP5TTPn*Jylu(Vfj@TT z?)7DDc+K~>@08AnjGy|)%y;v;t?Sa6K5wdHiurLl-^&rp)hzCF{h|cM5SW60#H*oq ziSr!C0T~o>V z?aIoTVyxP9FRI&ooH*iB1j$AV8gR~&nz|=}c;TUbVl}i%!nha)G@qw`K3dNpA5yZ< zHPCgW;t_s5yPlL7*%rgDb+pWXf*B_hW~3^pzipiP_C6aALolPU_g7~`m|bh-gxvfC z1Y0}Z+MS}%_v-L{l`{Gkd5x@dAgzYG8FwQA`qBkd(JNl9bHyA40-t{anLcwys%r(u z#6G6rW5C->uaFeyAz{2^(>Q`*Ag9|yB0s@`i$$Vh+oFH)T;45Y;y|N+^0LSbrTN|| zRoq%D{=+j-ofNtUQF%wVsCet)q;rw{s2-}JS$wj61Adi3aXL(IV_3URu9hglbVG8| z`sQ};X-~Npf^lb)JBs_;(HkT|4!Do?MW7+3uC$1@4q0%^rr^)?ce>co_!?KjLHVt7 zOw(Ryp)|p@)8<(0(w%F6?=)#uq2%siM?Wg#CV}ttwl0=8tg*B4PCtLW z$lFY?4@c4rkC+!cha9`P5HV4)AWtL}CbQpMEM7i_ge*++^qKvC;2uJ&RvJAavhQH# z#1wKP**VL>qj*2pPOh1suia}>qh|lZ+#T~P7#p~_Dtp+#e0?=XxdhIFXAykLu$L1P z*61q#P#4;H%okAMMa$FGbshcuLhd^Erc1-#T1SkQq{-4(olPn~2-`R2D9|I=9KEGB z(6R~?H#mzqkv1tuc94 zFkdSoBiN~*q@Cqdm6fA0R`OK0qbj8bH^BNJ0(XI{IXR(!ErxG1xxJsW*?p`uXxQ`4 zsQV}D2UtZlV__VtM85j4w2B-Sb))ruZGZGO<0Q-+(R;zXj?X@DM?o<;s3GBc{4wyN z%qC-pH<_-cK6iMr?9AFeOOz*m&yJ8Vt-(f1KX^jye#+4!+8uZd&uafn8h}6F&d%{g znZrhMr_+0X`P;<1+@@#J3V!k z4bWrJ+g{~ff12OY%}THz31!~sxX#lKHtA^oa^7C;VtzPaV48J>OJ+aiMzWtcH8G_TYJvN40_ck1{rl z^yf}TgbZ9~fU`S2)oXNiTHKmKWmnkf2Gxp6ZR^>s$?XR>MafZ|{%Q-~d-B4~`t3J( zs=izhuQEF18`bK8*L@H-FHE12jHVra*I^4Ui)Nk&y%{j7K$d4+w4RtEGSnD!1Dh-a z>*soZbtehE^OBhFeP9(JTcqNl4)$Fp_{1$c#1%5qy_9~t{_=YtbpB@`lYh70$KWOj z4Uu;Nb72DxFT1`>kIMJw)O~&aCpLSUg*CS6>KKQEZguC=+ze{DK09y z>+m&e>JC4=zKJ=PtCFD4!a0VGw7Wrz#3plpw6}9&S3A58|Av7ck0_T()}Sn_Q*6a9 zk?PKMQUFwhuH=gllV|`%CPH?jC)s2{!n3Zfc{ekI3CYMHHLO;GW#FN}lX{sCKD8K1Tub`0(Yx8YTgRV*i3PM~ zPGr_)8y&hPytiB&?vsz5@Lh@k*2nCJCh05^AR80hWVi7;VVY%%I++0o6@Eef%%P^Qeekqg^!2TOUla{gJvf~T$05~}3$yyLRh}7h?uCDGg?7k;rw{yp43^KQx z*HvaY#WKiIM8=;}R-n@=x{%GJOBcO)bT%MwC1*65Nl>TQyQl0mT0)V3xPmvDw~ia` ze_Gqi1>gM)?d~8Qfg!2PgV$Em z+tp?p=2H1!@$Qxcz4K|jGF=Sz-J27vFS{{^WGrz=-xCQY0%h!LKt$mfj%S0hlr>Mv zaX(4oNr`U%T4))?`+IgJ_`Yu`xk%!*D^Bbbc=7@YYm27888y{t7Tc^j1@m@VdDz&x+`H}}J9jdj2j0B5kx<9fyvbKz{sk!Ru zsnD`3SYt^QduDX`&b`Wi2ypNwB)=Z<7j{vQ3Mcd%|cVcXD~m&B4Bu3_LDHr3H*D<}+DJQn%8V)i;O3hN>g$ z7e2+l$H-~Uzm`#fKNrfSy>wU5%d)5oOQ~DodgH~KryQXLf*6&NsXBz6sQaH1F8u?*L z)csbk!qI0%PDgTo5djPz*b*uAx&RABD`IFSPc?5dAtOb{BBG)SWDX%07XF(F1J_m` zX9=zyEDb%RY3qfmg%hk|?**;KV6AflA-E=cC(%BvP3yv-f}@-=SgKJ`U3`G46`Tp2 zp{FTulS}wK$de~3by?Zt$}-eRxrT7k*wxYY*$kAVTlV{ZHFfn9dSOE?hX+;;HKuu; zJ`eGv3aOKwRbN> z)Zu6GS=^2Ur#}tnOe?kEX!S>~GwWGM>w|**R#%1Z&Uywp6h84?JgULo*?mcshPzMR z@uq|@GP+%D#av2o3KILyQrR#!rg-%?HpCV^rnAC{JuGxC?Pr?)7QoH1>XI!YB(@#- zU7=b5LE2J6qec|Mbb|ksHvV)?HyFnxB{-M{O)AC045TTuJL8N$&dyX3RxRfoCrI@2 z0Go^tVshjIWd>89S0P* zmRbmTHUl&{Gq)0v2!mJyG&wc5nCS>qHv=>|HnPdpI0tU)BBW(crE8$xGGuq013GS)C9i~uSb z>Yaf~hGLR`N_f%>nmH%Ps1y;-8H^~>ETdGyK1rw&$vsW$PF0g5>qP@CfY=$W9?}`o zV?){ny&w*a;#D@po>43oDFanA?IGIK)Pb}PORH!yni9%^zNATd@0)M-+DFX?(g{O0 z;C0i!d;b$ubDuPxws_jNB-i=nboz1c-~awfZ(LvK5jsI9Q%ju8){4%B& zqto$T`;bVD&lhuS2OhuIz8=pnE@#K%xfnQZdp16qj2>M4(R@(U5NL14Y(5g9(Tqk5 zsOZ>#6E{}ks?6d><@R9j&d z3MTKn#s@<^3kB)PWzLwVZV8$x6(k&utx>6eWs(+{c6?S(1@C2BJqE4DgNvkrxbRiu z!t)uuTrq38hwVm(0G?a`Vvh0PejoQfIL&>ej2N|E`UL5utlO>ScIM1S=1j^Agr$tc zgD#~v{1zoqw!!a5`VNqQzJ191WR9;Z(aTt#31!TFSsJT|7xG}#*QE>zHA);P4w?3OsST^}8)TBH$VImuYtq_8Krl=o!@yfb0naqW5z zZ&G3wo|Abh+>>6oQp!Gntbyc!*0UCqt>{+Ld#p+6mjciu-6!5W@FpjNH8jws3{!(IR(E3n+gm-rNmclroL^|n9y_CkWwgL~>l>~{Y4tE=|sWPcH z4UC!w;UFuZ6oJ^u~5tj@Nc`j1%?9hVngM`vtl4&7WNR^Z;+YVcQ#w417 zH-m5f`TXr4Hd+bWNn>pSmx#V-DJkxQI*`vGZJ}_>k!LWyk`fm~t6jd3z<9CF-r+gO zZen|!4f|aM6;ar~6eU1!16z9XQBVf>o6!7I{!?CClBOi_KkP3lwPmY{y0`ElJC)Es zkHpG1iS;>A%_@p#uvO@L_(>Xnb`)tbM`Y9zyEhb_kU8ZtdPF>mXX1D0=ivvWCU0E% z_^QoTqi*`K)d%zVYR+ibyeAlT=GLMXn9IQR=p|xAKN|z@Sdl@qC9rH2&@O#2YgkGd z`8_b?+0Ck7uEz)$Y9*T;@QylAb$(U1YW3Xpa|K!8=qp)ytY(YUtsZ@U`1qpCNWWI#_C$j@q?l03(k*{al~Mb zBYVpkK!k^;93Cx-qf@cCfjEwcy0URRw*fcgJSoPBu&H%qCS{I@DWr5{WwCOOV`0nD zG^{Q=UvD{@GaO@aE=22pN6|S+Nm&|p5anVNi)7g4MEXFyB(~;Ju^q=;Wa$y9JC->> z+7f@Tn|F~lDkD}yWJdidmKLv&8TZJzaRd!bS=PK&MXRDuz(ymCNG9?+aH!!pTITpI zS;3swiaidZJHnC>3512Bo{2ImWJ?l3A}>{(W95?R7?uu4G3X(G(PVavx=AcM`Ul3# zCeBByk{qRyLb49Tn1Q_+tpZ19-YE8jeiidZ>KCCG)-kT6Ins~fM~*C0OxPbBBXSs} zEThdxdR?OZ#J&^fPORzN6Zstuoe;petax?2WF(Tca7h}BNSI5*VVsfd^a^_-5wK%K z9AhDo@@mZzbCKhJs#WH+n)6Qdklbon!`hODao-xvl`fQb+}o_QCtaZLu zBKx{zu0^_%^@Ys#k!_+{@myxezyy0evZs(9k{y=0r0x(uNoE;}HX+WJc{A%kj}Oj* z%@UESR>>u6;q_{D>bm6!i}B#rdX*i|CoM6rMy`5GX<0IV%R8cIno!_+kPJFV#?B?# z1kQoU*5Z>bq?FRKHMS0x#X~|svgx0%i8rE-lQxuR-CZ}Rm6d3ku{Q?Or4yZVu@dtTs|eoz1eb54hjhqTl^3` zTcCjaN;)KeTKvpML%!6LMdFX;h>6`)aNI95hQX9-mldzp3Uq2TYKaiha~3WZ0^f3G zRIM*sd#j!;@5a@t@VI4Pc`y08b6j8b;&+0O)Jq91!3?l(30^cwmQmGjQkGm+hHX3e zMpKus&2MVj{Wra#{kCFpxV&bN#Q`8gR2^~25gZ$T*i@O>%5V%maLUEdaB$am0$CZ( z8`-DSHhg8h*StV&rT=0tP)8iK7myZ?Hv~%&i_aE|t(G%b@vewNizQsi$+nIyBhMpk z;wfcz@w^OpncoYXTA3wz1gB#7mx2y!6$Ct4S&gb*tsb}b*3_)#NcCaovbXfG9Qavw z3}r!os<*{Gt`0v}IspxXk-OnX9CIlV#20tUN1|LxOJXOSG=kq`0c%Q{gG3IV4Z_aM zCrTM%n1IbAv(g-fLLRO7E0^3Xa|EeNU9vs#*~ldlueK1@nlUI`a2Z+Gh4WC=hf;Z; z%cOF!UFs%s%BstVPz6iU8dO-eWH0Fgk6iYDmL85T1{??8IN&qAw}KVdX=UQtHs}Z5 zv3hfT$ei0ls-^0@dX^m|<&|9xE<4pStd%m!3Ui6;tJ*?_Wnf&k zZe{n%R+r1144)e)*}=XDe16I+EW1YYad2jj%mJU5uutY8Hyjd9P_~~P znB}g78FlOtWji}Ka-4)GK1rhtFbOGJ4q7EkjaNmwYtg2ZCCXnbmuCx=mG0YP881da z;uho}a*~b@?iKLijJrDh-c%yzF83Ppyne5d5CP>rP5hkedVZ{aa)68eN8|bNZ1QDsF_Vk`=c6-v z^X|d74__Sp>*2Ep@3f*9)6waFym8Xw0srpco)67E{;81;d>Z+2uf0D%#;*@3QQvDH zj=p?1o}7MO;O=j)1sS}%r}6Up#b`P?zJGo?9XB-n;bMIDBT~%0_FWl7hLGm?JeuJr z2LIFEZy&S|+sEyzHSfOqxMbvQ#)!uwa4x8#q{ETygg~h?a%Gc zli%p;;&RrWwx8Qc`+w~(?X*2>&)bXk;(XkGX@6~J?Yy0je;c2-^T{9Wd^(zcZWrz1 z^K3kB7r$S$m+f!u@9iJ|D{>!9h+9qWzy4*#*MNNb{MFmTw*sWu0Hg(w8aYJ18z5Ew z3XqQo%7Y5b=RG*z4gfuWzBoG@F|0ggJ3c=FQowis^XEUmd?qw&w9h95b_e;F@oaK& zvWEN>n74nn|7rg-o?Q&U_uGK~{^`;C$A<*|0=Z0HrU)h z`2Nv*QIU8xG#xU3I_f(@Ch3jH96|1ZJX_h_!-FR;j~=o->6)F-0%>(u8lScjXe(QG z^y7~Y?n9v0o3^l1o$ji98-bR_EaZ*i^xepJti?~SbS3ZF_oUgQGg906==`J|ElABP zeJo`j^OiovSQNS1cVv-r*ny^v!*q7<$ zoSfCKmluoi$)_pz>K>#cKS@waitx{my7W(YH@P0fL)xd^kai!Ot*+pHYfyeSyPPhd@&(@{$>r? z%Im#-`10s~iD<|M%Wkb0j_hzbo^PxneuIX1TWg3KYRI>w5r?$?qh7p@82tiKc^@xw zZ++q8R&MRZlb>E4J%R8WWH+(%0><3Q_K2H}=E_Dv(>U<$R(v4iR<7>FcSmoYyn~Dz zJe}<_H5Jv}$QU-tDBEx;ByW_Sq*v&3+{zKY{OAhtDQdpX)Cwz{OHm92QLJq4Spe* z;uj+PLb^Rj;r{_hTUo}#SFfMFe-Dt}Z%hw!bC0t_jYCcRh^a$~_R(%ZBGSqYn`d-> zTD!o1)uY(M(QJ0{d#M)0&PI#R?I-FZKIGGA)_%GqH$*OTGCQ7*$w8Ld+2wRG`7-^p zJo3gYIfgjR$@$Nd^U2~*s!-c}e9WE=?p5ATFHR@NqbXvzQaz{1OZ8&bo=wha$|ad| za`u=(7@JSdCezWZzglT4Q~u=X;hQ&)d9}iSh5(q>3jfs$|Ia9a>)W|On= zK%TA4_N&7;k6(Sq@@&euJPCw@A#Br_%v~toX7AKfsjH;h%1S@~;oY0Jhb-MDucROy zeie~$d6Vx#y0F!9ZDmm(zkK`R`709WX!9A0U&a#dt_}~<4T9}1)RsW|C7V_;tg8cm zh3ZhW7X%LvMXyVT)L{rn+9o};6U$v;72@mr@zAXKI=Xx&2uk4r?1F?XN( zzgkQ1e(E0p%#SA%IHAiksV^qelW`q?0}T8bxxn!`6w@adDMFaP!I@cy$6HYYWI_i!ky z`A;t>Wot8RYGv@gx;)u2f9Eh~Rh0~;$pBDUxMw;!xXg=ou*SvPM*+W3C zf%v~zA53QR1(@JVogYT!K^q$Mc5<@#JeR-Kur&k2E5S7e!jJ)Us~E5~17UjxhSy?i z45T3gHGQKi@>cJ#0E648A^D(xhU9(j$!}`;-S!L&?}64BFl!8?%?#M>85mr)tkGZ7 zz~pNBD%2~mg8~dLN7m?vZRqc?0|s{)YxLLj58hBlt%1e+Vy-|w&k z21l}M^w;#y_BH?pt>~NW>Gw}X*T}CMz?!^w_3H1i{rcyXYt+}2&)J%PdaZkI2keK( zhimlHSFCSu^ZV@?7#__H8Axgi2J8+D43E9m7+6!kp7!h7|9Q2;?&}}qtdU<=zuNlZ zVI}?!%0D=FStGxue|8gfw?p(XI2T!?p0*Hv2lB&Hj3NE3t~Wh>zsA6t4yIZM@UQ~l zis%P_$Y_qgWORvCp!%CcSwe;O>EvYIeDGTS{F0Xo?dn_Cb`#+6C3eB1JD%d>>k`1So3M7&ZxBEE3!iv;p> zs3sNdN)qJJx4Q)q2^R3%`0`zd6|GAJ&aBk#M6wdhv$o}1V9Eu?T(mD+a|Nz{jQp31S*(x;(nuoxC=2Jyxg6N7Fg94jr1Eq&6zhz=) zRP_ZHoE7TK)9Z0J`DF}+;}=_bSE%K=&y0hi%)dC+KGi*xjhyktI}H?B>tvL={pxfD zht)ojL@j=<7P0!qp36+ai;s&J)7VdS8OZrObhquUqxp*Jry91HobwqOH<@u65j~cK z6hq*Dn35?u`2~`p{h*n^zjWUdc(q`)-)YRDro>*#srS@Y6MEOwjBlr3821Dc=`L7U zEI{nf)si)rO2!ug*bR@%)MYJX^O;M9Y!u}LYYV;q^!rF4?yeGeJNT~81g4XwO-Kd( z)rXZ$v9=CzE8+g_xzn2?U^J66aWtRoCp+-*eA%ju)!TF!edPJEafoUlc=q$Xb_onYu;k0-s$75V+aMBwP3A{U^bdp`f z?^E%g35~CikyD)Pp|*(D_QFSr=CaY}0vc1T*-^?}jWvoeWpdUhT+gA>Bc=kUD1)H+ zKp)cl(?4lzI0UZ$IeE^J7(*nK2~0u#hL9wQ#(J3p7lwq+4F0|_oqf4P9)!>kp$byN z^TZ~eo1v2o1U?<)NK~ZYG6o*be8l3-BJ!FtQixQaHc~M70MK<3pa?=xk~_sso`5qh zSJGiO#g52c_n!HTOJ^jG!C0dE`?EELH$aa$Z4@uPt&qt1%cn%SkEh*9`~Akyk!ayI zT^T*{1)`vLdbl4!DH`9-q!KPvPn~11L`!e=f;Ah&^_A~F*F0T}^Tl9-AI`f($thl+ z`@}?jJ-qKv#lWwJEZ4r9z56k`qA^B-onY_#m!dAP!0WQ+QDe@g#iiWyZ93~{Tr(TMrHT8sLv)if)U|vvzox8^ z8`W!^?gV3pSIKR`9bTR|9!imrP>&6tVD07Lrbvzt#Xb5Y(fd z9)q8-$Z8CtP8HjzwiAB2(99D)>g^MHd6GSC%<&YJsJAnJk1O+va$zi#(sNN8G6;KQ z8}wFa1G$bRhkEerHT3J!2$o7mc@&VLZts=Q%YoZFvBgF+`mfSvLX&s-$7pHH=PC&= zs2#7ha%E}QW!v)&+D@WxXr1bXP)JUrrRxlU>a~Y`5u)6~@7vT-NNPD%a^6xa zp`E4irFZ0Ll^qvOhfT6F1@Jmmp)vtCiY8SwVf&y=I^2SWWWMFw@+o_F)!c(I`Hv{D zTgkVT@j}(F6O+nZgwcbX>7o&LAE$=}q@w8uB6SO+I(tSiSW0a~1-p*VcS*`$-5vw* zIw=2E_cJ8f)j*7Qk(AI_hIA(n^cJywx(@%=PrJYGI~I3D4;k$7kV}ONkee$AkaZO4 zOEdXW=eea1t4$iX_$20l%bw`8d5B8E|1=`wsuiEJN2cwVJ13uDotw})pdK;+tUps#pT0(d#2V{Wu3f^z9$Ub`+_p?bs#$*IvN=sAPju&du5s||Qs{oW zP4eoT3bkBYUiI{u{Pmz^ipu-cQDt!nKASy$7@F1z!zDxGD!sZjGP@&8la6*9CPNc! zI<=T(IOZ+Th@ft$ElF&$ru(k!1mL?hPn}(mj1vL~2m*$s73?v_@yL9%Eb2f&R?nB^ zv#(^OUhy5H?PKMSd`qmN4B*tnS(Dv6zL?C|Ttt!7@3Y%$&TpZ1mbgARsuAC-rmOmA zi9V=WjMS%GhTkwt=_l&}*zi>*5}3Hmxikt}X%f6OZnXN^Mv8*#c1pIGbM*Mw7JL|5 z)MfD%EEeihZir7&qTdgk%B?t$@kUENESN8-M7Xl+9s!tQ>Gy`~-NBdP0~joFx`T~& zOKun{u~P8@fZ-&s{YBgdo>as5Z!H8v9n!1mSUC1Amj7yO@7i~Il~{%NRKP`&m(!|T z6{^{p()GWNVSA<&@zko3Y2z3&nC0Z@Bap^$vFhugDYTHK%0Y-I(Z(w3XrCD;5RFGKZE>)(V$gY^_OIO;XyPvW#AP*?yjA_L1~*dsQ;7xz}dyg+&H=PGcqa+ zA6K%pC2}we2PY>74;K$N8wW1~2L}Ta3cHHEnY6KsIVGdCAQuNGALqOHPe}5OC31AK z`^`U*{R5Ez9}2sym6Njzr2r@Y|D4TobMqzl;K98I`7dAW&6?1C@2dD?9R1B)sa%-? zpq5yd+2RUiW^yrQ$SqEn)()35e~4hjy*|bX%Mm+915B@t7cJ!#p6_F+UH{a$T5-j@ zatw%z9ZVMuFLMagU(c#!zUsx(XHmZdA;W-qaUS`1IgAS_?84~{3Z!jnAvO=;qpV|W0jdaYb7{pham}8vRQk_co{DR6mTh>bHhGZX`!$v39zA~ zNGFWpn#pIKqLKP*pe>OE8wJKf2hGzb;Dyo-F)7C+pia=qhMSE)^nX-P50FnK4+&e` zZEpVQ4$PkXI1!U!`iD>t70^SH!|Naj{<6jJljj_*ZFKm&1J7!2Xv~!6T)>EykWGx< z`Q~O(r|PvvXw}y0Liy>cs7z<0_U7T@Jbmc2Pr~i^`N8q2?3?k!_0DY*YDVKR4s-n$+5lRsx zh{h@;D%EZ3>m`oOoGoe<*z`#Rl~PTSl=+(y!o>*^1!TLMos~))Qoiylqsx6-OH691 z&P->iAsY0LwEh)GsYHp|E$|c7mhPv!No6qgv3&o6{te6MVP<_#er;}xp zmB4v!N#xL+fkoyLm9Td24pFZZ7?rXtDjwKcx6bQeQdUdGvy#I`kh5c0{4K=6!eZ3z zq?(L*rN=cnHq`(s(ddLAWvS4Fz8;S*lZ{a~&rc9#?y2UYI)SccZ&dWlScAWVISt!w zxd5lGl)oVEKtM_$29BB(^w!uIFD?WbeooC#s7{$A`C}R}LDt~tJ+J%}cnQAq@;s+) zOnBDCLYulnfS{!@DNivT=F@iYnSP6)+iHApICZa zzMWp34_9YrYqu9hO9?TzxYquh84WNWVGYH^d>@-V+C?Cg$7zGt;O|c{D~RzjXT%Tb z<5g`8IP0f(S(PPJKN7=5Zk&m%ckQ!INvjg~ipIgbcj+N(<3-~ylWi*$yjh~_cH~6r zDWl(y4~g%Tw2#`}-0rq^_A<8u?8SY$UY@IcTn6J9sP+;qj1v)bgCT>*A^YFE7`A5W zuU;DjKBSr;0WV(9z%R$AJ3HqY=2?4?2{zx|_us8kAa^k#9ZRpR9ZSQTP3cchFMYO3 zafwC)3r|IpDslIBp^p;=;|1 zDk_M}WXI-y04Ot|O0@KCO<@43s2`z!JFKCR`eoc%D%{J^x6|Qivb58Qd~zQhkx&37 zCk8s_aKffUZt^?8X*ew$n5XjDH+Ym7Ow@A?S(Rrv%F33|I%t~(`teyQz#>xqif1mC z{L`)pg{O~54c3_8XMz*d%lI4YDMCfvFRCo-K*}G$)J})4;I5RDh;?~@7ys3L6iDB0 zK2SwK?L_XUyg1$sEkD`r|C#ENk}EMV(*w4HP525yuw673;8LEYM?%=>0PKc`#gw&L)@?Bw7iaaHyw?Js2)cpsvnGXTz>WvsN|= zgOR;9(=>&cp5O#G1Wgx1a}>(q(TMt$^?pbJlWYBgnayKFd+%Hi+0l zY+mbUBTvnJWaXG6A%B~8&JnURSJ}@LwV~M&QRs`tKHr`Lvg3G(^{QRPoHPKX$8rq0 zZX~j2jUa|+5lVw}L zNkQ~@y|jQ5LVAzyI-JUvmv+nF;2*(fo>vB=Rd`V~zo8CJAB8qtt#V@RY#M4IrkIhUZZXRiB>R_Np~-Z{aN! zw${`ORWAx*mEPqA>8)F#u&b`5_u0LH{~~($vnGF#+j_QuEi&==l1eg8%y=H-c7Ss7 z`S8!2yLIAd$_0aDrP(fVDN2iIBDXxAQy6o}=O6k74g=+qGZ^Hid&?A`RGc0#iy{>z zGyjeOY$hQq@tl83DWq%LI*}2E5O) z4aEWbz79cHJMwt?6O9M_{Sck8%B0Km=$SEv}_Y`U1w8m zER_GDOy_t{_B8;Q`1u$a{37=WQ?r_@@vLlV zQ;TA(!0)X{scJXIELS8tiLqru6!6-OxwZ}HlaJ3laX6gcsTF_C)Eyv$;+Stt<7b(| z{@V8QM@!p+pRL=1C`K{|jLjwqUq}nGe|v6x38!w;ug4H^S}YUTC(F(Mn-dZc*IU$3 z{-kludlz~X?{pF9Fy=&P9mph_Xx|JfAF3Qeq^cZ%eGE;TOIo& ztI8k9mCo`*yYS3N*oP;euI~=-%!q*_snzc7>{qAN#dj8_r`#R&qH+ojcCYmS3ai~X z&ggn&Vhs8C3Oml2`fq&tk^Epc8uG8$jwPgS^*#CC5Pm2M_-9Z=f6H-`>w~myi(Xo6 znt>{T0p?oDP;xW*z|FL1)V3#i>%?%TWBzWu6gjrk{8U5K7n=t(;~Bc4o$iZk?4bw1 zjGMY^(1YT3Tre=&+Ak-~GUVdh5MoY_)q>~ur91DnET8GS{`z^t=PGc6w>%>E%;jgY z;7Y^=43Yipohe5Q!CC zG&@=H5H_;D-ASH?t%`JwZql@|(y|{dCgA6}$|g zmB9Siz|W#bLGORRgJbt55>!`5itx-Oi}C|kxAE1P-D@dWR|IK{x&<&!kjEf@lz{U5 zw8j>d)TOb-vpcDo6w)rt`aS%Tk9jmu=#H%JCRsi_c7Wi)D`^*cREiN3K2(LE3l@e9 z2fl%&*g&E~-)>~1ofXqzi(uxoYBwBShq4W33Ga8M!#)a%{m173whHun7zjakC)Di^ zK11<)j@MJL_`XHmGUHDl{}WU*#(s$&=(8Vpu}6GU!zU|fBU_qUQt@VCv8rH8rwzU}l5&TatlS(}( zbh0_kSiJ+GX#OGC3qL3Pf#cvhpIiyIhhF&?e7UpptftwrbKa2gwO3g1%WYp0V(M;L zAJ|KD^_1M=^Mhoirg_9rm+F1>%kit*(@~b{3Recb@4Bcr%d4_dm}$ftthjBy(!aZ@ zFnrv}1J=mYU_O8W3cH4tr#U4j4<7)9U5D}uCnYx}=et$i-rnUOD<>rfr8)|`f}Mr^ zKSi$pDoRs+5tfpY6yWBOZOp!#n1^D`j&# zOBWC&_d6#L^bjmla+VD;Ja|cOaQ|28aSSBvv_I_BdnpwXjU(jtj3*T4j3X4i!|%3T z#c%lO8-r#jyjB5_5vWKPJX5-xN308^9Z8Zg;bFtgsn9G0A{UE?av3bcYfdT7x1fEg z53K|?vVk>0Td(AASEKEdLM@qI6IT-==K`7g9&h_zndhndJX&uT;8|ux8HO}yit(vv z90W>fmK-t~R)6C94v-wOjRe`UaXn4YjS=QD(z0|e2EKZH_?`{ z>P3QHjG#^#(}M3=38jxE%V=eniAbKSO1oL_p5cw9K171bA`YsW83y>m_F5{)>jy{&VCcQ${ zjI zt_ZC@>@g^2n*75$JPAk$bccNI{{&K<#y#3QDuSztX_P1ys}IA^KFquRsXApny?}fJ z-!y$RbxjB+&L{%iSJf*1?%ta>VSlNV%MF9ups}=YqBx~o=Ax=g3jq<<^%DN!r>>Id zb4XkXYG$gE5{e$Y-{t4r2*l4}!`|L*$2z4YHqFw2eh{s8wWqKl)b6;Q}lR}b2I-MpTa^(h4Hs}Gf|6`IRkxQq`hd76ebVdd*I)#fA zK}=H}f`6J!GTrLZ_ZPimG)`*Jz9;^tMYeb6UkfLDref-UnQ}${t+r!Z&m{S8eWI}C z5vJP=lke^MECk5^t<9N#s@cbFvk_0H{inY*a8%Cc*?$|0PPX3_&cViQ%QIqWm7!3q zX|n;D26#6DkgLQ$2|i+$nD9<(%mbNtJrZP9=yjWEasc|Ql>@>l)G|4^*S8zfnU=;XbP^_* zmilwayIZv{QolG^dj>;`jm7RYy*m~t1yHuLDVzdDFm7{TL3F=OTde(^i3t{gHK!lv zIhmsn89@QR#U4c;!Q25}AAZ&dK_9kmQ!oXVuUoho8NZt?8`A)$sfVl@O}m>}7xsAb zBLwQM+vhKoFdR6ho8BEQ(~qqh-K|?gjSahNY@|sahB;7akB3KzyL-}6vV?BdXr}#8 zOe;yoz6HN}j%^VZzj+s-ySA~NpgM((4?3xO7r(o=!5gYtpN#^JRejXfufVDWwwjWS z0$Q$z(}C0_;G+YK^d{{Af=D(&28_=pfFJg;2Z|pRp$GhdA91i7328h+`$j9Iho2vl zzMHlg>SKVC(f>W1YNVuYTL(neK)h;{g>J!SRNih82MS>*%pRI*Wb!~Z4(s)$tVIHE znD-$8p&m0Y8$EC~)RHKYq_uj|sdDD9wbQZKe}HD&??l0+X9~7zM(f*!lD=NpgLGQ# zK@#e&;)JwXF&Z}O!K(?pPTcIvp;>Jp_`eP6HzL+9X=5wC3Vb^`no-e$>9Bh*b8n3P zvFfh8DfG77=li!z(>+7ntxUHHF==;Olgy5haCJYo3vu}hbm<0EtcpA++KQeWbi)hO zSrgsT?@GW5be{L`i>v{CraeVYo$#s|(JNmsiMy=^+>nAiqu=e5uaVd(ECcF?DIQhN z;eZaizgndiW%z4lZRpIwN{+hWed?r1+tu&r6qa9H>?=M1v}BVy%?fXzm;en`fib*qWRYBAE(~zMS4EU`ejl4anGWBlnO3|nnH@)C){OL|r zu`lS5*Ztc%xPuP8uYh&3&3!muXz%4O9^D@v-|~3#IvsBOBLlk4^!ewyUZ^>kB!~fz zg^Ess5#IU$c3T-TAf-Ww?lu56d#U`mwH|*80XRPKoKqjTNeJR2@#q2wSpF!(*=%0= z113l`T8dxYR%Ej^>6Qx zfj{nhWnaMN-)1y$@_n5lipQpQa2~eW7<@Yuj(rE^qqitkC${WUDz`}l;d<%~1vFS$ zmkHNZk!sOc>{Y$hR?cxJj&&~Qs#R!O@TyWpw&3Z6tMrT5TfAlNDIXmBGNwQ10>{ZsXvovG ztF_jPHh6t;k?|lW$`*wjmD+QU(QNLk8i;*;%Q;JEx=Fa-@0o3W^|Tw8G9i{D7Gw2$ z2mMOUP$vL6iXsqE`^7e~@Ks;s28?rM*~MTRu^`oP#eJ20{d0o*ZDqFA1i`@QHIM3t za(kXKS=~{JmX6J9wD+bi4*j!%oazaq$xw!n^*rZrw1V4MX`VJ&N2RGR8P2&N6!&Qg zP>%07#ZV6OIK@Y9|L9%oIx5zZdl6*aOfhR(eBSp3VoGBPd2c?By)DUtt%{&!P9DqB z)=4lZ_-`@-_ujVi0pGUfoYjZzAkQ%iO#WJ{aB4bXn5!OF;M1p(Bg>=`hh%@^0j4W4 z?(A|;1)d+Xdv85TGjn{UOzp^(=~8_0_n;QHMt0Rz`{d&o z)<^kCwc74= zSLyGOt^F1ZU)L6vZUnztySdjkGxtz7-vv9jS*D?b+GXAI=)2a0a0Y#Ig6WreR0;oO z@(`c*{otTX8hk4F;!&u#29u62e5ZLQf*>Sb@xXmX6HC;MMpTzoGPK^f32Vcd$S|pp|lbiT@yT>hc~&_8=j* z*{|%ifXR-$x3aT)LL)==>C%LrVGfmGqS}wBUmrs4z}iBQhFtBjH^9FGhHAN(MiFRV z%wF)HACeRKQ90y!F@o9dt&_0{Y185M+1w4%2~s<+W&DV0QSrSs3Wp(Q@&M=*`gC|5 zR`)BVkx#Jtaa$Q^$!3de?xRdzutte!_GS?3vd3L9Lrpj2>fLy@FMlhKX!_KN)_*Us zzEj`lgN?J^@y)bt_Qv<^*iOjNzpuU1@p-ipy77tF!=QPo9839F`xpyr7L9zWAVCycmz@2jTe*8N?6jSSAal?Ui95L?Itn;!m| z+h~PlTL!D92CpH29@VnZm7Bx}izPEu#t7^()%yipE-hq)>U<})gcgnQrTLixMIbwr zL&XJ#w9aAY{C%gDu;u|r2H!4Z20{@IA&U3}eHv9wfNLcew3ZnvQd>OKoAr$Tf;qu~ zJ;-c4%WWbgroK}jS(q~$jkJ#!dn=*KD@&>*&8mf|JRgVzRvzw>KD-Rje@*+g@M4qP9!M6Q8Vb{^I@ zrZ)O?{M03#N}V6~X;bKX0AXL>iJQRFp0KmDN20sv&v6c1KaGCE1=8MA8ap%hP$FBx zh4~tAs00vUyCP?3(~bEC(8L|}gH0vD&&`P#9}i1s6=Tv|=gkW+(GQM-62HE(8OUfq zF4(jRS`|?E%Id6X*S%#{dY11!u(TdEq*aMkm1$^S<a7F{n*IT4k|ZkxpQ(5>AdVF>%IF@2 z%i4$whatever'. This content is opaque to +end user applications and can change without notice in future versions of this +package. These names and content are technically visible to end-user +applications, but this is only to enable the creation and use of polymorphic +functions via the \verb'_Generic' keyword in ANSI C11. + + \begin{spec} {\bf SPEC:} The following macros are extensions to the spec. \end{spec} @@ -1399,7 +1429,7 @@ \subsubsection{{\sf GrB\_UnaryOp\_new:} create a user-defined unary operator} \begin{mdframed}[userdefinedwidth=6in] {\footnotesize \begin{verbatim} -GrB_Info GB_UnaryOp_new // create a new user-defined unary operator +GrB_Info GrB_UnaryOp_new // create a new user-defined unary operator ( GrB_UnaryOp *unaryop, // handle for the new unary operator void *function, // pointer to the unary function @@ -1857,9 +1887,10 @@ \subsection{GraphBLAS select operators: {\sf GxB\_SelectOp}} %================== const void *k // user-defined auxiliary data ) ; \end{verbatim}} -There are five built-in select operators listed in the table below. Each can -be used on any type including user-defined types. User-defined select -operators can also be created. +There are five built-in select operators listed in the table below. For the +first four operators, \verb'k' is a pointer to a single scalar of type +\verb'int64_t'. Each operator can be used on any type, including user-defined +types. User-defined select operators can also be created. \vspace{0.2in} {\footnotesize @@ -1878,11 +1909,10 @@ \subsection{GraphBLAS select operators: {\sf GxB\_SelectOp}} %================== } \vspace{0.2in} -\begin{specbeta} +\begin{spec} {\bf SPEC:} \verb'GxB_SelectOp' and all built-in functions in the table above -are extensions to the spec. They were introduced in SuiteSparse:GraphBLAS -Version 1.1.0, Dec 1, 2017, and should be considered in Beta status. -\end{specbeta} +are extensions to the spec. +\end{spec} The built-in \verb'GxB_NONZERO' select operator is unique in that it is a function of the value of the entry $a_{ij}$, but it is still type-generic. It @@ -1912,7 +1942,7 @@ \subsection{GraphBLAS select operators: {\sf GxB\_SelectOp}} %================== \vspace{0.1in} %------------------------------------------------------------------------------- -\subsubsection{{\sf GrB\_SelectOp\_new:} create a user-defined select operator} +\subsubsection{{\sf GxB\_SelectOp\_new:} create a user-defined select operator} %------------------------------------------------------------------------------- \label{selectop_new} @@ -3557,7 +3587,7 @@ \section{GraphBLAS Operations} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \verb'GrB_apply' & apply unary operator & ${\bf C \langle M \rangle = C \odot} f{\bf (A)}$ \\ & & ${\bf w \langle m \rangle = w \odot} f{\bf (u)}$ \\ \hline -\verb'GrB_select' & apply select operator & ${\bf C \langle M \rangle = C \odot} f{\bf (A,k)}$ \\ +\verb'GxB_select' & apply select operator & ${\bf C \langle M \rangle = C \odot} f{\bf (A,k)}$ \\ & & ${\bf w \langle m \rangle = w \odot} f{\bf (u,k)}$ \\ \hline \verb'GrB_reduce' & reduce to vector & ${\bf w \langle m \rangle = w \odot} [{\oplus}_j {\bf A}(:,j)]$ \\ @@ -3574,11 +3604,10 @@ \section{GraphBLAS Operations} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \newpage %=============================================================================== -\subsection{The GraphBLAS specification written in {MATLAB}} %================== +\subsection{The GraphBLAS specification in {MATLAB}} %========================== %=============================================================================== \label{spec} -Each operation is described in the following sections. In addition, SuiteSparse:GraphBLAS includes a MATLAB implementation of nearly the entire GraphBLAS specification, including all built-in types and operators. The typecasting rules and integer operator rules from GraphBLAS are implemented in @@ -3670,7 +3699,7 @@ \subsection{The GraphBLAS specification written in {MATLAB}} %================== Additional files are included for creating test problems and providing inputs to the above files, or supporting functions: -\vspace{0.2in} +\vspace{0.1in} {\footnotesize \begin{tabular}{ll} MATLAB \verb'GB_spec' function & purpose \\ @@ -3688,7 +3717,6 @@ \subsection{The GraphBLAS specification written in {MATLAB}} %================== \hline \end{tabular} } -\vspace{0.2in} \newpage An intensive test suite has been written that generates test graphs in MATLAB, @@ -4532,7 +4560,7 @@ \subsubsection{{\sf GxB\_Matrix\_subassign:} assign to a submatrix } exactly as described in Section~\ref{accummask}, but operating on the submatrix ${\bf S}$, not ${\bf C}$, using the optional \verb'Mask' and \verb'accum' operator. The matrix ${\bf T}$ is simply ${\bf T}={\bf A}$, or ${\bf T}={\bf -A}^T$ if ${\bf A}$ is transposed via the \verb'desc' descriptor, +A}'$ if ${\bf A}$ is transposed via the \verb'desc' descriptor, \verb'GrB_INP0'. The \verb'GrB_REPLACE' option in the descriptor clears ${\bf S}$ after computing ${\bf Z = T}$ or ${\bf Z = C \odot T}$, not all of ${\bf C}$ since this operation can only modify the specified submatrix of ${\bf C}$. @@ -5004,7 +5032,7 @@ \subsection{Comparing {\sf GrB\_assign} and {\sf GxB\_subassign}} %============= present but \verb'GrB_SCMP' is true). After (optionally) complementing the mask, the value of \verb'M(i,j)' can be 0 for some entry outside the \verb'C(I,J)' submatrix. If the \verb'GrB_REPLACE' descriptor is also - true, the \verb'GrB_assign' deletes this entry. + true, then \verb'GrB_assign' deletes this entry. \item They differ in how duplicate indices are treated in \verb'I' and \verb'J'. @@ -5030,7 +5058,7 @@ \subsection{Comparing {\sf GrB\_assign} and {\sf GxB\_subassign}} %============= \verb'GxB_subassign' is much faster than \verb'GrB_assign', when the latter must examine the entire matrix \verb'C' to delete entries (when -\verb'GrB__REPLACE' is true), and if it must deal with a much larger +\verb'GrB_REPLACE' is true), and if it must deal with a much larger \verb'Mask' matrix. However, both methods have specific uses. Consider using \verb'C(I,J)+=F' for many submatrices \verb'F' (for example, @@ -5102,7 +5130,7 @@ \subsection{Comparing {\sf GrB\_assign} and {\sf GxB\_subassign}} %============= \item For \verb'GxB_Matrix_subassign' (Step 2), the mask is applied to just ${\bf - S}$. The mask has the same size as ${\bf C(I,J)}$, ${\bf S}$ and ${\bf A}$. + S}$. The mask has the same size as ${\bf C(I,J)}$, ${\bf S}$, and ${\bf A}$. Just prior to making the assignment via the mask, the \verb'GrB_REPLACE' option can be used to clear ${\bf S}$ first. No entries in ${\bf C}$ that are outside the ${\bf C(I,J)}$ can be modified by this operation. Thus, @@ -5404,18 +5432,11 @@ \subsubsection{Performance of {\sf GxB\_subassign}, {\sf GrB\_assign} time complexity can become quadratic in the worst case. \item However, any single assignment takes no more than $O (a + s \log n + n + -k + s \log s )$ time, even including the time for a matrix completion. This -time is essentially linear in the size of the matrix \verb'C', if \verb'A' is -relatively small and sparse compared with \verb'C'. In this case, $n+k$ are -the two dominant terms. This is the case for the example given in -Section~\ref{subassign_matrix}, where SuiteSparse:GraphBLAS takes only 0.74 -seconds to compute \verb'C(I,J)=A' just once, where MATLAB takes 87 seconds for -the same computation. The time for SuiteSparse:GraphBLAS includes the final -completion, and it returns the matrix back to MATLAB as a valid MATLAB sparse -matrix with all pending computations completed. In this example, \verb'C' is -much bigger than \verb'A'. As a comparison, MATLAB takes just 0.42 seconds to -compute \verb"C+C'" for this matrix, which also takes time linear in the size -of the matrix, $O(n+k)$. +k + s \log s )$ time, even including the time for a matrix completion, where +\verb'C' is $n$-by-$n$ with $k$ entries and \verb'A' is $a$-by-$a$ with $s$ +entries. This time is essentially linear in the size of the matrix \verb'C', +if \verb'A' is relatively small and sparse compared with \verb'C'. In this +case, $n+k$ are the two dominant terms. \item In general, \verb'GxB_subassign' is faster than \verb'GrB_assign'. If \verb'GrB_REPLACE' is used with \verb'GrB_assign', the entire matrix @@ -5439,7 +5460,11 @@ \subsubsection{Performance of {\sf GxB\_subassign}, {\sf GrB\_assign} i7, 16 GB Ram, OSX 10.11.6, clang 8.0.0, MATLAB R2017A.} The same computation takes 0.74 seconds in SuiteSparse:GraphBLAS, a speedup of over 100. This is after finishing all pending computations in GraphBLAS and returning result to -MATLAB as a valid MATLAB sparse matrix. +MATLAB as a valid MATLAB sparse matrix. The dominant time complexity for +GraphBLAS is $O(n+k)$, where $n$ is the dimension of \verb'C' and $k$ is its +number of nonzeros. As a comparison, MATLAB takes just 0.42 seconds to compute +\verb"C+C'" for this matrix, which also takes time linear in the size of the +matrix data structure, $O(n+k)$. \newpage %=============================================================================== @@ -5474,7 +5499,7 @@ \subsubsection{{\sf GrB\_Vector\_apply:} apply a unary operator to a vector} \end{verbatim} } \end{mdframed} \verb'GrB_Vector_apply' applies a unary operator to the entries of a vector, -analogous to \verb't = op (u)' in MATLAB except the operator \verb'op' is only +analogous to \verb't = op(u)' in MATLAB except the operator \verb'op' is only applied to entries in the pattern of \verb'u'. Implicit values outside the pattern of \verb'u' are not affected. The entries in \verb'u' are typecasted into the \verb'xtype' of the unary operator. The vector \verb't' has the same @@ -5504,7 +5529,7 @@ \subsubsection{{\sf GrB\_Matrix\_apply:} apply a unary operator to a matrix} \verb'GrB_Matrix_apply' applies a unary operator to the entries of a matrix, analogous to -\verb'T = op (A)' in MATLAB except the operator \verb'op' is only applied to +\verb'T = op(A)' in MATLAB except the operator \verb'op' is only applied to entries in the pattern of \verb'A'. Implicit values outside the pattern of \verb'A' are not affected. The input matrix \verb'A' may be transposed first. The entries in \verb'A' are typecasted into the \verb'xtype' of the unary @@ -5548,12 +5573,10 @@ \subsection{{\sf GxB\_select:} apply a select operator} %======================= describing each variation. When discussing features that apply to both versions, the simple name \verb'GxB_select' is used. -\begin{specbeta} +\begin{spec} {\bf SPEC:} The \verb'GxB_select' operation and \verb'GxB_SelectOp' operator -are extensions to the spec. They were added to SuiteSparse:GraphBLAS 1.1.0 -on Dec 1, 2017, and should be considred as Beta. Their design may change as -feedback is received on this feature. -\end{specbeta} +are extensions to the spec. +\end{spec} % \newpage %------------------------------------------------------------------------------- @@ -6490,7 +6513,7 @@ \subsection{Creating a finite-element matrix} } } \end{verbatim}} -Since there is no \verb'Mask', and since \verb'GrB_REPLACE' not used, the call +Since there is no \verb'Mask', and since \verb'GrB_REPLACE' is not used, the call to \verb'GrB_assign' in the example above is identical to \verb'GxB_subassign'. Either one can be used, and their performance would be identical. @@ -6511,7 +6534,7 @@ \subsection{Reading a matrix from a file} Section~\ref{random}. The function can return the matrix as-is, which may be rectangular or -unsymmetric. If an input parameter is set to make the matrix symmetric, the +unsymmetric. If an input parameter is set to make the matrix symmetric, \verb'read_matrix' computes \verb"A=(A+A')/2" if \verb'A' is square (turning all directed edges into undirected ones. If \verb'A' is rectangular, it creates a bipartite graph, which is the same as the augmented matrix, @@ -6625,7 +6648,7 @@ \subsection{Triangle counting} When a mask is present and not complemented, \verb'GrB_INP0' is \verb'GrB_TRAN', and \verb'GrB_INP1' is \verb'GxB_DEFAULT', the SuiteSparse:GraphBLAS implementation of \verb'GrB_mxm' always uses a dot-product -formulation. Thus, the ${\bf C \langle L \rangle} = {\bf U}^T{\bf L}$ method +formulation. Thus, the ${\bf C \langle L \rangle} = {\bf U}'{\bf L}$ method uses dot products. This provides a mechanism for the end-user to select a masked dot product matrix multiplication method in SuiteSparse:GraphBLAS, which is occassionally faster than the outer product method. @@ -6649,7 +6672,7 @@ \subsection{Triangle counting} Sandia \cite{WolfDeveciBerryHammondRajamanickam17} & \verb"sum(sum((U*U).*U))" & ${\bf C \langle U \rangle} = {\bf UU}$ (outer product) \\ SandiaDot & \verb"sum(sum((U'*L).*L))" - & ${\bf C \langle L \rangle} = {\bf U}^T{\bf L}$ (dot product) \\ + & ${\bf C \langle L \rangle} = {\bf U}'{\bf L}$ (dot product) \\ SandiaL & \verb"sum(sum((L*L).*L))" & ${\bf C \langle L \rangle} = {\bf LL}$ (outer product) \\ \hline @@ -6691,7 +6714,8 @@ \subsection{Triangle counting} Ghz Intel Core i7, 16 GB RAM, OSX 10.11.6, MATLAB 2017a, with the clang 8.0.0 compiler. Only a single core was used for these results. In addition, the matrix \verb'L=tril(A)' and/or \verb'U=triu(A)' are used as-is without any -reordering. MATLAB failed on three matrices because \verb'U*U' is too large. +reordering. The run times include the time to construct \verb'L' or \verb'U'. +MATLAB failed on one matrix because \verb'U*U' is too large. For the first set of matrices, the outer product formulation (${\bf C \langle U \rangle} = {\bf UU}$) is always faster than the dot product formulation, but this is not the case for the second set. @@ -6704,7 +6728,7 @@ \subsection{Triangle counting} using the Intel \verb'icc' 17.1 compiler. Unlike the other three methods, \verb'L' is sorted by decreasing row degree, which improves the performance. The Kokkos time includes the time taken to do the sort. The run time listed is -the best time obtained from several runs with 1 to 64 threads. +the best time obtained from several runs with 1 to 32 threads. Comparing GraphBLAS and Kokkos is difficult since these results were obtained on different machines. Also, the results in @@ -6714,9 +6738,9 @@ \subsection{Triangle counting} \cite{WolfDeveciBerryHammondRajamanickam17} state that reordering \verb'L' improves the run time. However, with these many caveats, the last column lists the speedup of Kokkos over the GraphBLAS outer-product formulation. Since the -Kokkos method is parallel (with up to 64 threads on 32 cores) these preliminary +Kokkos method is parallel these preliminary comparisons indicate that the sequential performance of GraphBLAS is -competitive. Using up to 64 threads, Kokkos is about 3 to 15 faster than +competitive. Using up to 32 threads, Kokkos is about 3 to 18 faster than SuiteSparse:GraphBLAS, which is currently sequential (median speedup of about 9). Further comparisons are required, however. A parallel implementation of the matrix-matrix multiply in \verb'GrB_mxm' is also in progress. @@ -6775,120 +6799,120 @@ \subsection{Triangle counting} \hline matrix % & $n$ & \# entries & \# triangles & \multicolumn{2}{|c|}{MATLAB} % (U*U).*U -& \multicolumn{2}{|c|}{${ \bf C \langle U \rangle} = {\bf L}^T{\bf U}$} +& \multicolumn{2}{|c|}{${ \bf C \langle U \rangle} = {\bf L}'{\bf U}$} & \multicolumn{2}{|c||}{${\bf C \langle U \rangle} = {\bf UU}$} & \multicolumn{3}{|c|}{Kokkos} \\ & time & rate & time & rate & time & rate & time & rate & speedup \\ \hline %------------------------------------ SNAP/cit-HepPh & % 34,546 & 420,877 & 1,276,868 & - 0.357 & 1.18 & % MATLAB - 0.165 & 2.56 & % dot C=L'*U - 0.042 & 10.14 & % outer C=U*U - 0.0044 & 95.4 & 9.4 \\ % Kokkos + 0.363 & 1.16 & % MATLAB + 0.180 & 2.47 & % dot C=L'*U + 0.049 & 9.59 & % outer C=U*U + 0.0044 & 79.9 & 8.3 \\ % Kokkos %------------------------------------ SNAP/cit-HepTh & % 27,770 & 352,285 & 1,478,735 & 0.415 & 0.85 & % MATLAB - 0.165 & 2.13 & % dot - 0.041 & 8.49 & % outer - 0.0050 & 70.2 & 8.3 \\ % Kokkos + 0.171 & 2.05 & % dot + 0.046 & 8.31 & % outer + 0.0050 & 72.5 & 8.7 \\ % Kokkos %------------------------------------ SNAP/email-EuAll & % 265,214 & 364,481 & 267,313 & - 0.180 & 2.03 & % MATLAB - 0.121 & 3.00 & % dot - 0.032 & 11.45 & % outer - 0.0058 & 62.8 & 5.4 \\ % Kokkos + 1.264 & 0.29 & % MATLAB + 0.133 & 2.73 & % dot + 0.035 & 10.33 & % outer + 0.0058 & 70.7 & 6.8 \\ % Kokkos %------------------------------------ SNAP/soc-Epinions1 & % 75,888 & 405,740 & 1,624,481 & - 0.535 & 0.76 & % MATLAB - 0.360 & 1.13 & % dot - 0.059 & 6.87 & % outer - 0.0039 & 104.0 & 15.1 \\ % Kokkos + 0.778 & 0.52 & % MATLAB + 0.376 & 1.08 & % dot + 0.067 & 6.01 & % outer + 0.0039 & 108.0 & 18.0 \\ % Kokkos %------------------------------------ SNAP/soc-Slashdot0811 & % 77,360 & 469,180 & 551,724 & - 1.022 & 0.46 & % MATLAB - 0.281 & 1.67 & % dot - 0.044 & 10.56 & % outer - 0.0061 & 76.8 & 7.3 \\ % Kokkos + 0.990 & 0.47 & % MATLAB + 0.318 & 1.47 & % dot + 0.052 & 9.04 & % outer + 0.0061 & 76.8 & 8.5 \\ % Kokkos %------------------------------------ SNAP/soc-Slashdot0902 & % 82,168 & 504,230 & 602,592 & - 0.856 & 0.59 & % MATLAB - 0.308 & 1.64 & % dot - 0.047 & 10.70 & % outer - 0.0063 & 80.0 & 7.6 \\ % Kokkos + 0.985 & 0.51 & % MATLAB + 0.339 & 1.49 & % dot + 0.059 & 8.61 & % outer + 0.0063 & 80.1 & 9.3 \\ % Kokkos %------------------------------------ SNAP/amazon0312 & % 400,727 & 2,349,869 & 3,686,467 & - 0.727 & 3.23 & % MATLAB - 0.436 & 5.39 & % dot - 0.250 & 9.40 & % outer - 0.0754 & 31.2 & 3.3 \\ % Kokkos + 1.285 & 1.83 & % MATLAB + 0.514 & 5.32 & % dot + 0.306 & 8.61 & % outer + 0.0754 & 30.7 & 3.6 \\ % Kokkos %------------------------------------ SNAP/amazon0505 & % 410,236 & 2,439,437 & 3,951,063 & - 0.819 & 2.98 & % MATLAB - 0.460 & 5.31 & % dot - 0.254 & 9.59 & % outer - 0.0177 & 137.8 & 14.4 \\ % Kokkos + 1.018 & 2.07 & % MATLAB + 0.545 & 4.48 & % dot + 0.297 & 8.21 & % outer + 0.0177 & 133.0 & 16.2 \\ % Kokkos %------------------------------------ SNAP/amazon0601 & % 403,394 & 2,443,408 & 3,986,507 & - 0.741 & 3.30 & % MATLAB - 0.461 & 5.39 & % dot - 0.255 & 9.59 & % outer - 0.0184 & 132.8 & 13.8 \\ % Kokkos + 1.018 & 2.40 & % MATLAB + 0.563 & 4.34 & % dot + 0.296 & 8.27 & % outer + 0.0184 & 132.0 & 16.0 \\ % Kokkos %------------------------------------ SNAP/cit-Patents & % 3,774,768 & 16,518,947 & 7,515,023 & - 11.382 & 1.45 & % MATLAB - 3.676 & 4.49 & % dot - 2.042 & 8.09 & % outer - 0.4970 & 33.2 & 4.1 \\ % Kokkos + 11.026 & 1.50 & % MATLAB + 4.416 & 3.74 & % dot + 2.300 & 7.18 & % outer + 0.4970 & 31.5 & 4.4 \\ % Kokkos %------------------------------------ SNAP/soc-LiveJournal1 & % 4,847,571 & 42,851,237 & 285,730,264 & - mem & & % MATLAB - 38.253 & 1.12 & % dot - 9.671 & 4.43 & % outer - 0.7330 & 58.5 & 13.2 \\ % Kokkos + 11.026 & 0.40 & % MATLAB + 39.767 & 1.08 & % dot + 10.123 & 4.23 & % outer + 0.7330 & 58.5 & 13.8 \\ % Kokkos %------------------------------------ Gleich/wb-edu & % 9,845,725 & 46,236,105 & 254,718,147 & - mem & & % MATLAB - 6.720 & 6.88 & % dot - 3.097 & 14.93 & % outer - 0.2320 & 199.3 & 13.3 \\ % Kokkos + 67.636 & 0.68 & % MATLAB + 8.016 & 5.77 & % dot + 3.605 & 12.82 & % outer + 0.2320 & 199.0 & 15.5 \\ % Kokkos %------------------------------------ \hline %------------------------------------ SNAP/p2p-Gnutella09 & - 0.010 & 2.55 & % MATLAB - 0.002 & 13.29 & % dot - 0.001 & 32.80 & % outer + 0.004 & 6.50 & % MATLAB + 0.002 & 10.65 & % dot + 0.001 & 24.24 & % outer & & \\ % Kokkos %------------------------------------ Mallya/lhr71 & % 70,304 & 1,492,794 & 160,592 & - 0.285 & 5.25 & % MATLAB - 0.017 & 89.33 & % C=L'*U - 0.017 & 86.84 & % C=U*U + 0.252 & 5.93 & % MATLAB + 0.058 & 25.90 & % C=L'*U + 0.030 & 50.37 & % C=U*U & & \\ % Kokkos %------------------------------------ Freescale/Freescale2 & % 2,999,349 & 5,744,934 & 21,027,280 \\ - 0.486 & 11.82 & % MATLAB - 0.293 & 19.60 & % dot - 0.180 & 31.90 & % outer + 0.741 & 7.75 & % MATLAB + 0.501 & 11.46 & % dot + 0.276 & 20.83 & % outer & & \\ % Kokkos %------------------------------------ Freescale/circuit5M & % 5,558,326 & 26,983,926 & 31,019,473 & mem & & % MATLAB - 2.091 & 12.91 & % dot - 193.375 & 0.14 & % outer + 2.819 & 9.57 & % dot + 194.142 & 0.14 & % outer & & \\ % Kokkos %------------------------------------ DIMACS10/hugebubbles-00020 & % 21,198,119 & 31,790,179 & 0 & - 6.824 & 4.66 & % MATLAB - 2.026 & 15.69 & % dot - 5.963 & 5.33 & % outer + 7.406 & 4.29 & % MATLAB + 3.417 & 9.30 & % dot + 6.568 & 4.84 & % outer & & \\ % Kokkos %------------------------------------ vanHeukelum/cage15 & % 5,154,859 & 47,022,346 & 36,106,416 & - 8.836 & 5.32 & % MATLAB - 3.078 & 15.28 & % dot - 2.087 & 22.53 & % outer + 10.187 & 4.62 & % MATLAB + 4.407 & 10.67 & % dot + 2.443 & 19.25 & % outer & & \\ % Kokkos %------------------------------------ \hline @@ -6903,33 +6927,59 @@ \subsection{Triangle counting} {\footnotesize \begin{verbatim} int64_t ntriangles ; - GrB_Index n ; - GrB_Matrix C ; - GrB_Matrix_nrows (&n, U) ; + GrB_Index n, one = 1 ; + GrB_Matrix C, U ; + GrB_Matrix_nrows (&n, A) ; + + // U = triu (A, 1) + GrB_Matrix_new (&U, GrB_UINT32, n, n) ; + GxB_select (U, NULL, NULL, GxB_TRIU, A, &one, NULL) ; + + // C = U*U GrB_Matrix_new (&C, GrB_UINT32, n, n) ; GrB_mxm (C, U, NULL, GxB_PLUS_TIMES_UINT32, U, U, NULL) ; + + // ntriangles = sum (C) GrB_reduce (&ntriangles, NULL, GxB_PLUS_INT64_MONOID, C, NULL) ; + GrB_free (&C) ; + GrB_free (&U) ; \end{verbatim} } +\newpage The dot product method -${ \bf C \langle U \rangle} = {\bf L}^T{\bf U}$ -in GraphBLAS is: +${ \bf C \langle U \rangle} = {\bf L}'{\bf U}$ +in GraphBLAS is similar: {\footnotesize \begin{verbatim} int64_t ntriangles ; - GrB_Index n ; - GrB_Matrix C ; - GrB_Matrix_nrows (&n, U) ; + GrB_Index n, one = 1, minusone = -1 ; + GrB_Matrix C, L, U ; + GrB_Matrix_nrows (&n, A) ; + + // U = triu (A, 1) + GrB_Matrix_new (&U, GrB_UINT32, n, n) ; + GxB_select (U, NULL, NULL, GxB_TRIU, A, &one, NULL) ; + + // L = tril (A,-1) + GrB_Matrix_new (&L, GrB_UINT32, n, n) ; + GxB_select (L, NULL, NULL, GxB_TRIL, A, &minusone, NULL) ; + + // C = L'*U GrB_Matrix_new (&C, GrB_UINT32, n, n) ; GrB_Descriptor_new (&d) ; GrB_Descriptor_set (d, GrB_INP0, GrB_TRAN) ; GrB_mxm (C, U, NULL, GxB_PLUS_TIMES_UINT32, L, U, d) ; + GrB_free (&d) ; + + // ntriangles = sum (C) GrB_reduce (&ntriangles, NULL, GxB_PLUS_INT64_MONOID, C, NULL) ; + GrB_free (&C) ; - GrB_free (&d) ; + GrB_free (&L) ; + GrB_free (&U) ; \end{verbatim} } @@ -7010,19 +7060,19 @@ \section{Installing SuiteSparse:GraphBLAS} \verb'icc' compiler, version 18.0 or later is required. Version 2.8.12 or later of \verb'cmake' is required; version 3.0.0 is preferred. -To compile SuiteSparse:GraphBLAS and the demo programs: +To compile SuiteSparse:GraphBLAS and the demo programs, simply type \verb'make' +in the main GraphBLAS folder, which compiles the library and runs several +demos. - {\small - \begin{verbatim} - cd build - cmake .. - make \end{verbatim} } - -{\bf Please be patient. Some files take very long time to compile.} +GraphBLAS is not yet parallel, but it is thread-safe if multiple simultaneous +calls are made to GraphBLAS functions. For this usage, GraphBLAS must be +compiled with OpenMP so that GraphBLAS has access to a critical section +mechanism. OpenMP is optional if the user application does not make +multiple simultaneous calls to GraphBLAS. If \verb'cmake' or \verb'make' fail, it might be that your default compiler does not support ANSI C11. Try another compiler. For example, try one of -these options: +these options. Go into the \verb'build' directory and type: {\small \begin{verbatim} @@ -7031,10 +7081,11 @@ \section{Installing SuiteSparse:GraphBLAS} CC=xlc cmake .. CC=icc cmake .. \end{verbatim} } -If this still fails, see the \verb'GraphBLAS/CMakeLists.txt' file. You may -need to pass compiler-specific options to your compiler. Locate this section -in the \verb'CMakeLists.txt' file. Use the \verb'set' command in \verb'cmake', -as in the example below, to set the compiler flags you need. +Then do \verb'make' in the \verb'build' directory. If this still fails, see +the \verb'CMakeLists.txt' file. You may need to pass compiler-specific options +to your compiler. Locate this section in the \verb'CMakeLists.txt' file. Use +the \verb'set' command in \verb'cmake', as in the example below, to set the +compiler flags you need. {\small \begin{verbatim} @@ -7043,7 +7094,7 @@ \section{Installing SuiteSparse:GraphBLAS} if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") # cmake 2.8 workaround: gcc needs to be told to do ANSI C11. # cmake 3.0 doesn't have this problem. - set (CMAKE_C_FLAGS "-std=c11 -lm") + set (CMAKE_C_FLAGS "-std=c11 -lm -fopenmp") ... elseif ("${CMAKE_C_COMPILER_ID}" STREQUAL "Intel") ... @@ -7066,42 +7117,46 @@ \section{Installing SuiteSparse:GraphBLAS} matrices in the \verb'Demo/Matrix' folder. The output of the demos will be compared with expected output files in \verb'Demo/Output'. -To install the library in \verb'/usr/local/lib' and \verb'/usr/local/include': +To install the library in \verb'/usr/local/lib' and \verb'/usr/local/include', +go to the top-level GraphBLAS folder and type: {\small \begin{verbatim} - cd build sudo make install \end{verbatim} } -SuiteSparse:GraphBLAS creates 960 hard-coded versions of \verb'C=A*B', one for -each possible semiring that can be constructed from built-in types, operators, -and monoids (see Section~\ref{semiring_new}). To compile faster without these -960 versions, edit \verb'Source/GB.h' and uncomment \verb'#define GBCOMPACT'. -GraphBLAS will still be able to compute \verb'C=A*B' for any valid semiring, -but it will be slower for built-in semirings. Alternatively, edit the -\verb'CMakeLists.txt' and change the \verb'CMAKE_BUILD_TYPE' to \verb'Debug', -which enables the 960 built-in semirings but compiles with no optimization. -These options are not recommended for end-users and are intended only for code -development of GraphBLAS itself. - -Several other compile-time options can be selected by editing the -\verb'Source/GB.h' file, but these are meant only for code development of -SuiteSparse:GraphBLAS itself, not for end-users of SuiteSparse:GraphBLAS. +Several compile-time options can be selected by editing the \verb'Source/GB.h' +file, but these are meant only for code development of SuiteSparse:GraphBLAS +itself, not for end-users of SuiteSparse:GraphBLAS. To perform the extensive tests in the \verb'Test' folder, and the statement coverage tests in \verb'Tcov', MATLAB R2017A is required. See the \verb'README.txt' files in those two folders for instructions on how to run the tests. -To remove all compiled files: - - {\small - \begin{verbatim} - cd build ; rm -rf * \end{verbatim} } +To remove all compiled files, type \verb'make' \verb'distclean' in the top-level +GraphBLAS folder. {\bf NOTE: SuiteSparse:GraphBLAS has not yet been ported to Windows.} However, with \verb'cmake' the port to Windows should be straight-forward -(in progress). +(this is in progress). + +\newpage +%------------------------------------------------------------------------------- +\section{Acknowledgements} +%------------------------------------------------------------------------------- + +I would like to thank Jeremy Kepner (MIT Lincoln Laboratory Supercomputing +Center), and the GraphBLAS API Committee: Ayd\i n Bulu\c{c} (Lawrence Berkeley +National Laboratory), Timothy G. Mattson (Intel Corporation) Scott McMillan +(Software Engineering Institute at Carnegie Mellon University), Jos\'e Moreira +(IBM Corporation), and Carl Yang (UC Davis), for creating the GraphBLAS +specification and for patiently answering my many questions while I was +implementing it. + +I would also like to thank S\'ebastien Villemot (Debian Developer, +\url{http://sebastien.villemot.name}) for helping me with various build issues +and other code issues with GraphBLAS (and all of SuiteSparse) for its packaging +in Debian Linux. \newpage %------------------------------------------------------------------------------- @@ -7110,7 +7165,7 @@ \section{Installing SuiteSparse:GraphBLAS} {\small \addcontentsline{toc}{section}{References} \bibliographystyle{annotate} -\bibliography{GraphBLAS_userguide.bib} +\bibliography{GraphBLAS_UserGuide.bib} } \end{document} diff --git a/GraphBLAS/Doc/Makefile b/GraphBLAS/Doc/Makefile index 5a25447f36..2de9fa5154 100644 --- a/GraphBLAS/Doc/Makefile +++ b/GraphBLAS/Doc/Makefile @@ -1,3 +1,14 @@ +#------------------------------------------------------------------------------- +# GraphBLAS/Doc/Makefile +#------------------------------------------------------------------------------- + +# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +# http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +#------------------------------------------------------------------------------- + +# creates the user guide + GraphBLAS_UserGuide.pdf: GraphBLAS_UserGuide.tex GraphBLAS_UserGuide.bib pdflatex GraphBLAS_UserGuide.tex bibtex GraphBLAS_UserGuide diff --git a/GraphBLAS/Include/GraphBLAS.h b/GraphBLAS/Include/GraphBLAS.h index 45a90fa276..0bbf23cbb7 100644 --- a/GraphBLAS/Include/GraphBLAS.h +++ b/GraphBLAS/Include/GraphBLAS.h @@ -16,19 +16,29 @@ // mathematics of sparse matrix operations on a semiring. // This GraphBLAS.h file contains GraphBLAS definitions for user applications -// to #include. Functions and variables with GB_ or _opaque in their name need -// to be defined in this file and are thus technically visible to the user, but -// they must not be not accessed in user code. They are not guaranteed to be -// present in all implementations of GraphBLAS. +// to #include. Functions and variables with the prefix GB_ need to be defined +// in this file and are thus technically visible to the user, but they must not +// be accessed in user code. They are here only so that the ANSI C11 _Generic +// feature can be used in the user-accessible polymorphic functions. For +// example GrB_free is a macro that uses _Generic to select the right method, +// depending on the type of its argument. // The GraphBLAS API Specification 1.1.0 is provisional, but this -// implementation fully conforms to that specificatin. It does include -// functions and features that are extensions to the spec. These are cataloged -// here and tagged with "SPEC". +// implementation fully conforms to that specification. This implementation +// does include functions and features that are extensions to the spec. These +// are cataloged here and tagged with "SPEC." // All functions and definitions that are extensions to the spec are given // names of the form GxB_* for functions and built-in objects, or GXB_ for // macros, so it is clear which are in the spec and which are extensions. +// Extensions with the name GxB_* or GXB_* are user-accessible in +// SuiteSparse:GraphBLAS but cannot be guaranteed to appear in all GraphBLAS +// implementations. In the future, if any GxB_* functions are included as-is +// in the GraphBLAS API spec with GrB_* names, the prior GxB_* variants that +// appear here will be kept for backward compatibility. If they must change +// for inclusion in the spec, a reasonable attempt will be made to keep the +// prior GxB_* variant alongside the GrB_* version, also for backward +// compatibility. #ifndef GRAPHBLAS_H #define GRAPHBLAS_H @@ -61,16 +71,10 @@ #define GXB_VERSION(major,minor,sub) \ (((major)*1000ULL + (minor))*1000ULL + (sub)) -// This implementation conforms to the GraphBLAS provisional release 1.1.0 -#define GXB_MAJOR 1 -#define GXB_MINOR 1 -#define GXB_SUB 0 -#define GXB GXB_VERSION(GXB_MAJOR, GXB_MINOR, GXB_SUB) - // The version of this implementation: #define GXB_IMPLEMENTATION_MAJOR 1 #define GXB_IMPLEMENTATION_MINOR 1 -#define GXB_IMPLEMENTATION_SUB 0 +#define GXB_IMPLEMENTATION_SUB 2 #define GXB_IMPLEMENTATION \ GXB_VERSION (GXB_IMPLEMENTATION_MAJOR, \ GXB_IMPLEMENTATION_MINOR, \ @@ -82,16 +86,7 @@ "http://suitesparse.com Dept of Computer Sci. & Eng, Texas A&M University\n" // and its date: -#define GXB_DATE "Dec 1, 2017" - -// The 'spec' string describes the GraphBLAS spec: -#define GXB_SPEC \ -"GraphBLAS C API, provisional release, by Aydin Buluc, Timothy\n" \ -"Mattson, Scott McMillan, Jose' Moreira, Carl Yang. Based on\n" \ -"\"GraphBLAS Mathematics\" by Jeremy Kepner.\n" - -// and its date: -#define GXB_SPEC_DATE "Oct 10, 2017" +#define GXB_DATE "Dec 28, 2017" // The GraphBLAS license for this particular implementation of GraphBLAS: #define GXB_LICENSE \ @@ -109,6 +104,25 @@ "See the License for the specific language governing permissions and\n" \ "limitations under the License.\n" +//------------------------------------------------------------------------------ +// GraphBLAS C API version +//------------------------------------------------------------------------------ + +// This implementation conforms to the GraphBLAS provisional release 1.1.0 +#define GXB_MAJOR 1 +#define GXB_MINOR 1 +#define GXB_SUB 0 +#define GXB GXB_VERSION(GXB_MAJOR, GXB_MINOR, GXB_SUB) + +// The 'spec' string describes the GraphBLAS spec: +#define GXB_SPEC \ +"GraphBLAS C API, provisional release, by Aydin Buluc, Timothy\n" \ +"Mattson, Scott McMillan, Jose' Moreira, Carl Yang. Based on\n" \ +"\"GraphBLAS Mathematics\" by Jeremy Kepner.\n" + +// and its date: +#define GXB_SPEC_DATE "Oct 10, 2017" + //------------------------------------------------------------------------------ // include files required by GraphBLAS //------------------------------------------------------------------------------ @@ -283,6 +297,7 @@ const char *GrB_error ( ) ; // return a string describing the last error // also define new types based on any typedef in the C language whose values // are held in a contiguous region of memory. +// USER CODE SHOULD NOT RELY ON GB_LEN #define GB_LEN 128 typedef struct @@ -294,7 +309,7 @@ typedef struct } GB_Type_opaque ; // CONTENT NOT USER-ACCESSIBLE -// The GrB_Type handle user-accessible, but GB_Type_opaque is not: +// The GrB_Type handle is user-accessible, but GB_Type_opaque is not: typedef GB_Type_opaque *GrB_Type ; // GraphBLAS predefined types and the counterparts in pure C and in MATLAB @@ -323,7 +338,7 @@ GrB_Info GrB_Type_new // create a new GraphBLAS type ) ; */ - +// USER CODE SHOULD NOT RELY ON GB_STR OR GB_XSTR // GB_STR: convert the content of x into a string "x" #define GB_XSTR(x) GB_STR(x) #define GB_STR(x) #x @@ -334,7 +349,7 @@ GrB_Info GrB_Type_new // create a new GraphBLAS type // This function is not user-callable; use GrB_Type_new instead -GrB_Info GB_Type_new // create a new GraphBLAS type +GrB_Info GB_Type_new // USER CODE SHOULD NOT USE THIS FUNCTION DIRECTLY ( GrB_Type *type, // handle of user type to create const size_t size, // size of the user type @@ -466,7 +481,7 @@ GrB_Info GrB_UnaryOp_new // create a new user-defined unary operator // This function is NOT user-callable: -GrB_Info GB_UnaryOp_new // create a new user-defined unary operator +GrB_Info GB_UnaryOp_new // USER CODE SHOULD NOT USE THIS FUNCTION DIRECTLY ( GrB_UnaryOp *unaryop, // handle for the new unary operator void *function, // pointer to the unary function @@ -763,7 +778,7 @@ GrB_Info GrB_BinaryOp_new // This function is NOT user-callable: -GrB_Info GB_BinaryOp_new +GrB_Info GB_BinaryOp_new // USER CODE SHOULD NOT USE THIS FUNCTION DIRECTLY ( GrB_BinaryOp *binaryop, // handle for the new binary operator void *function, // pointer to the binary function @@ -858,7 +873,7 @@ extern GxB_SelectOp // for any built-in or user-defined type // For GxB_TRIL, GxB_TRIU, GxB_DIAG, and GxB_OFFDIAG, the parameter k is a -// const void * pointer to a single scalar value of type GrB_Index. These +// const void * pointer to a single scalar value of type int64_t. These // select operators do not depend on the values of A, but just their position. // For GxB_NONZERO, the result depends only on the value of A(i,j), and the k @@ -890,7 +905,7 @@ GrB_Info GxB_SelectOp_new // create a new user-defined select operator // This function is NOT user-callable: -GrB_Info GB_SelectOp_new // create a new user-defined select operator +GrB_Info GB_SelectOp_new // USER CODE SHOULD NOT USE THIS FUNCTION DIRECTLY ( GxB_SelectOp *selectop, // handle for the new select operator void *function, // pointer to the select function @@ -3514,12 +3529,12 @@ GrB_Info GxB_Matrix_subassign_UDT // C(I,J) = accum (C(I,J),x) // GxB_subassign is a generic function that provides access to all specific // GxB_*_subassign* functions: -// GxB_Vector_subassign (w,mask,acc,u,I,ni,d) // w(I) =acc(w(I),u) -// GxB_Matrix_subassign (C,Mask,acc,A,I,ni,J,nj,d)// C(I,J) =acc(C(I,J),A) -// GxB_Col_subassign (C,mask,acc,u,I,ni,j,d) // C(I,j) =acc(C(I,j),u) -// GxB_Row_subassign (C,mask,acc,u,i,J,nj,d) // C(i,J)=acc(C(i,J),u') -// GxB_Vector_subassign_T (w,mask,acc,x,I,ni,d) // w(I) =acc(w(I),x) -// GxB_Matrix_subassign_T (C,Mask,acc,x,I,ni,J,nj,d)// C(I,J) =acc(C(I,J),x) +// GxB_Vector_subassign (w,m,acc,u,I,ni,d) // w(I) =acc(w(I),u) +// GxB_Matrix_subassign (C,M,acc,A,I,ni,J,nj,d)// C(I,J) =acc(C(I,J),A) +// GxB_Col_subassign (C,m,acc,u,I,ni,j,d) // C(I,j) =acc(C(I,j),u) +// GxB_Row_subassign (C,m,acc,u,i,J,nj,d) // C(i,J)=acc(C(i,J),u') +// GxB_Vector_subassign_T (w,m,acc,x,I,ni,d) // w(I) =acc(w(I),x) +// GxB_Matrix_subassign_T (C,M,acc,x,I,ni,J,nj,d)// C(I,J) =acc(C(I,J),x) #define GxB_subassign(arg1,Mask,accum,arg4,arg5,...) \ _Generic \ @@ -4149,7 +4164,7 @@ GrB_Info GxB_Matrix_select // C = accum (C, op(A,k)) or op(A',k) // GxB_select: generic matrix/vector select //------------------------------------------------------------------------------ -// GrB_select is a generic function for applying a select operator to a matrix +// GxB_select is a generic function for applying a select operator to a matrix // or vector and provides access to these functions: // GrB_Vector_select (w,mask,acc,op,u,k,d) // w = accum (w, op(u,k)) @@ -4970,5 +4985,24 @@ GxB_LOR_LT_BOOL , GxB_LAND_LT_BOOL , GxB_LXOR_LT_BOOL , GxB_E GxB_LOR_GE_BOOL , GxB_LAND_GE_BOOL , GxB_LXOR_GE_BOOL , GxB_EQ_GE_BOOL , GxB_LOR_LE_BOOL , GxB_LAND_LE_BOOL , GxB_LXOR_LE_BOOL , GxB_EQ_LE_BOOL ; +//------------------------------------------------------------------------------ +// GxB_stats: memory usage and other statistics +//------------------------------------------------------------------------------ + +typedef struct +{ + int64_t nmalloc ; // # of objects malloc'ed but not yet freed + int64_t inuse ; // memory in use (in bytes) + int64_t maxused ; // max memory used since last call to GxB_stats + int64_t future [20] ; // not used, reserved for future use + double xfuture [20] ; // not used, reserved for future use +} +GxB_Statistics ; + +GrB_Info GxB_stats +( + GxB_Statistics *stats +) ; + #endif diff --git a/GraphBLAS/Makefile b/GraphBLAS/Makefile index 88d131af3a..d678cdd502 100644 --- a/GraphBLAS/Makefile +++ b/GraphBLAS/Makefile @@ -1,20 +1,47 @@ -# simple Makefile for GraphBLAS, relies on cmake +#------------------------------------------------------------------------------- +# GraphBLAS/Makefile +#------------------------------------------------------------------------------- +# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +# http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +#------------------------------------------------------------------------------- + +# simple Makefile for GraphBLAS, relies on cmake to do the actual build. Use +# the CMAKE_OPTIONS argument to this Makefile to pass options to cmake. + +# build the GraphBLAS library (static and dynamic) and run a quick test default: - ( cd build ; cmake .. ; make ; cd ../Demo ; ./demo ) + ( cd build ; cmake $(CMAKE_OPTIONS) .. ; $(MAKE) ; cd ../Demo ; ./demo ) +# just build the static and dynamic libraries; do not run the demo library: - ( cd build ; cmake .. ; make ) + ( cd build ; cmake $(CMAKE_OPTIONS) .. ; $(MAKE) ) + +# the same as "make library" +static: library +# installs GraphBLAS to the install location defined by cmake, usually +# /usr/local/lib and /usr/local/include install: - ( cd build ; cmake .. ; make ; make install ) + ( cd build ; cmake $(CMAKE_OPTIONS) .. ; $(MAKE) ; $(MAKE) install ) + +# create the Doc/GraphBLAS_UserGuide.pdf +docs: + ( cd Doc ; $(MAKE) ) + +# remove any installed libraries and #include files +uninstall: + - xargs rm < build/install_manifest.txt clean: distclean purge: distclean +# remove all files not in the distribution distclean: - rm -rf build/* Demo/*_demo.out Demo/complex_demo_out.m - ( cd Test ; make distclean ) - ( cd Tcov ; make distclean ) - ( cd Doc ; make distclean ) + rm -rf build/* Demo/*_demo.out Demo/complex_demo_out.m Tcov/log.txt + ( cd Test ; $(MAKE) distclean ) + ( cd Tcov ; $(MAKE) distclean ) + ( cd Doc ; $(MAKE) distclean ) + diff --git a/GraphBLAS/README.txt b/GraphBLAS/README.txt index d7faa14ada..f862ebbe54 100644 --- a/GraphBLAS/README.txt +++ b/GraphBLAS/README.txt @@ -1,7 +1,7 @@ SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. http://suitesparse.com See GraphBLAS/Doc/License.txt for license. -VERSION 1.1.0, Dec 1, 2017 +VERSION 1.1.2, Dec 28, 2017 SuiteSparse:GraphBLAS is an full implementation of the GraphBLAS standard, which defines a set of sparse matrix operations on an extended algebra of @@ -72,8 +72,11 @@ build build directory, intially empty -------------------------------------------------------------------------------- -SPEC: This version fully conforms to GraphBLAS C API Specification 1.1.0. -It includes several additional functions and features as extensions to the -spec. These extensions are tagged with the keyword SPEC: in the code and in -the User Guide, and in the Include/GraphBLAS.h file. +SPEC: This version fully conforms to GraphBLAS C API Specification 1.1.0. It +includes several additional functions and features as extensions to the spec. +These extensions are tagged with the keyword SPEC: in the code and in the User +Guide, and in the Include/GraphBLAS.h file. All functions and objects with the +name GxB_* are extensions to the spec, as are all macros of the form GXB_*. +Functions, objects, and macros with prefix GB_* must not be accessed by user +code. They are for internal use in GraphBLAS only. diff --git a/GraphBLAS/Source/GB.h b/GraphBLAS/Source/GB.h index 50c6c04b45..e11488541d 100644 --- a/GraphBLAS/Source/GB.h +++ b/GraphBLAS/Source/GB.h @@ -210,7 +210,12 @@ GrB_Info GB_Vector_check // check a GraphBLAS vector // debugging enabled #ifdef MATLAB_MEX_FILE #define ASSERT(x) \ - if (!(x)) mexErrMsgTxt ("failure: " __FILE__ " line: " GB_XSTR(__LINE__)) ; + { \ + if (!(x)) \ + { \ + mexErrMsgTxt ("failure: " __FILE__ " line: " GB_XSTR(__LINE__)) ; \ + } \ + } #else #include #define ASSERT(x) assert (x) ; @@ -723,9 +728,11 @@ void *GB_realloc_memory // pointer to reallocated block of memory, or bool *ok // true if successful, false otherwise ) ; -void GB_free_memory // pointer to allocated block of memory to free +void GB_free_memory ( - void *p // only free p is if is not NULL + void *p, // pointer to allocated block of memory to free + size_t nitems, // number of items to free + size_t size_of_item // sizeof each item ) ; //------------------------------------------------------------------------------ @@ -741,7 +748,7 @@ void GB_free_memory // pointer to allocated block of memory to free { \ printf ("\nmatrix new: " \ "%s = new (%s, %s = "GBd", %s = "GBd", %d, %d) line %d file %s\n", \ - GB_STR(A), GB_STR(type), GB_STR(nrows), GB_STR (nrows), GB_STR(ncols), \ + GB_STR(A), GB_STR(type), GB_STR(nrows), nrows, GB_STR(ncols), \ ncols, Ap_calloc, Ap_malloc, __LINE__, __FILE__) ; \ info = GB_new (A, type, nrows, ncols, Ap_calloc, Ap_malloc) ; \ } @@ -785,12 +792,14 @@ void GB_free_memory // pointer to allocated block of memory to free p = GB_realloc_memory (nnew, nold, s, p, ok) ; \ } -#define GB_FREE_MEMORY(p) \ +#define GB_FREE_MEMORY(p,n,s) \ { \ if (p) \ - printf ("\nfree: " \ - "free (%s) line %d file %s\n", GB_STR(p), __LINE__,__FILE__) ; \ - GB_free_memory (p) ; \ + printf ("\nfree: %14p " \ + "(%s, %s = "GBd", %s = "GBd") line %d file %s\n", \ + p, GB_STR(p), GB_STR(n), (int64_t) n, GB_STR(s), (int64_t) s, \ + __LINE__,__FILE__) ; \ + GB_free_memory (p, n, s) ; \ (p) = NULL ; \ } @@ -822,9 +831,9 @@ void GB_free_memory // pointer to allocated block of memory to free p = GB_realloc_memory (nnew, nold, s, p, ok) ; \ } -#define GB_FREE_MEMORY(p) \ +#define GB_FREE_MEMORY(p,n,s) \ { \ - GB_free_memory (p) ; \ + GB_free_memory (p, n, s) ; \ (p) = NULL ; \ } @@ -906,7 +915,6 @@ bool GB_AxB_builtin // true if C=A*B is handled const GrB_Matrix Mask, // Mask matrix for C (not complemented) const GrB_Matrix A, // input matrix const GrB_Matrix B, // input matrix - void *work, // workspace of size A->nrows == C->nrows const GrB_Semiring semiring, // semiring that defines C=A*B const bool flipxy // if true, do z=fmult(b,a) vs fmult(a,b) ) ; @@ -1167,6 +1175,7 @@ GrB_Info GB_builder const bool already_sorted, // true if tuples already sorted on input const void *X, // array of values of tuples const int64_t len, // number of tuples + const int64_t ijlen, // size of i,j work arrays const GrB_BinaryOp dup, // binary function to assemble duplicates, // if NULL use the "SECOND" function to // keep the most recent duplicate. @@ -1179,7 +1188,8 @@ GrB_Info GB_build_factory // build a matrix int64_t **iwork_handle, // for (i,k) or (j,i,k) tuples int64_t **kwork_handle, // for (i,k) or (j,i,k) tuples const void *X, // array of values of tuples - const int64_t len, // number of tuples + const int64_t len, // number of tuples and size of kwork + const int64_t ilen, // size of iwork array const GrB_BinaryOp dup, // binary function to assemble duplicates, // if NULL use the "SECOND" function to // keep the most recent duplicate. @@ -1205,11 +1215,6 @@ void GB_free_pending // free all pending tuples GrB_Matrix A // matrix with pending tuples to free ) ; -void GB_queue_init -( - const GrB_Mode mode // blocking or non-blocking mode -) ; - void GB_queue_remove // remove matrix from queue ( GrB_Matrix A // matrix to remove @@ -1400,6 +1405,34 @@ typedef struct GrB_Mode mode ; // GrB_NONBLOCKING or GrB_BLOCKING + //-------------------------------------------------------------------------- + // malloc tracking + //-------------------------------------------------------------------------- + + // nmalloc: To aid in searching for memory leaks, GraphBLAS keeps track of + // the number of blocks of allocated by malloc, calloc, or realloc that + // have not yet been freed. The count starts at zero. malloc and calloc + // increment this count, and free (of a non-NULL pointer) decrements it. + // realloc increments the count it if is allocating a new block, but it + // does this by calling GB_malloc_memory. + + // inuse: the # of bytes currently in use by all threads + + // maxused: the max value of inuse since the call to GrB_init + + // malloc_debug: this is used for testing only (GraphBLAS/Tcov). If true, + // then use malloc_debug_count for testing memory allocation and + // out-of-memory conditions. If malloc_debug_count > 0, the value is + // decremented after each allocation of memory. If malloc_debug_count <= + // 0, the GB_*_memory routines pretend to fail; returning NULL and not + // allocating anything. + + int64_t nmalloc ; // number of blocks allocated but not freed + bool malloc_debug ; // if true, test memory hanlding + int64_t malloc_debug_count ; // for testing memory handling + int64_t inuse ; // memory space current in use + int64_t maxused ; // high water memory usage + } GB_Global_struct ; @@ -1414,7 +1447,8 @@ extern GB_Global_struct GB_Global ; // each thread that calls GraphBLAS needs its own private copy of these // variables. -#define GB_RLEN 2048 +#define GB_RLEN 3000 +#define GB_DLEN 2048 typedef struct { @@ -1429,7 +1463,7 @@ typedef struct const char *where ; // GraphBLAS function where error occurred const char *file ; // GraphBLAS filename where error occured int line ; // line in the GraphBLAS file of error - char details [GB_RLEN+1] ; // string with details of the error + char details [GB_DLEN+1] ; // string with details of the error char report [GB_RLEN+1] ; // string returned by GrB_error //-------------------------------------------------------------------------- @@ -1453,31 +1487,6 @@ typedef struct int8_t *Flag ; // initialized space int64_t Flag_size ; // current size of Flag array - //-------------------------------------------------------------------------- - // malloc tracking - //-------------------------------------------------------------------------- - - // This is useful only for testing and development of GraphBLAS, not for - // end user applications. Using nmalloc assumes the application that uses - // GraphBLAS is single-threaded. It is not meant to be thread-safe. - - // nmalloc: To aid in searching for memory leaks, GraphBLAS keeps track of - // the number of blocks of allocated by malloc, calloc, or realloc that - // have not yet been freed. The count starts at zero. malloc and calloc - // increment this count, and free (of a non-NULL pointer) decrements it. - // realloc increments the count it if is allocating a new block, but it - // does this by calling GB_malloc_memory. - - // malloc_debug: if true, then use malloc_debug_count for testing memory - // allocation and out-of-memory conditions. If malloc_debug_count > 0, the - // value is decremented after each allocation of memory. If - // malloc_debug_count <= 0, the GB_*_memory routines pretend to fail; - // returning NULL and not allocating anything. - - int64_t nmalloc ; // number of blocks allocated but not freed - bool malloc_debug ; // if true, test memory hanlding - int64_t malloc_debug_count ; // for testing memory handling - //-------------------------------------------------------------------------- // random seed for GB_rand //-------------------------------------------------------------------------- @@ -1536,7 +1545,7 @@ static inline GrB_Index GB_rand ( ) // details ("Row index 102 out of bounds, must be < 100"), and finally the // exact GraphBLAS filename and line number where the error was caught. -#define LOG GB_thread_local.details, GB_RLEN +#define LOG GB_thread_local.details, GB_DLEN #define ERROR(f,s) \ ( \ snprintf s , \ @@ -1722,7 +1731,9 @@ void GB_Flag_free ( ) ; // free the Flag array #define EMPTY (-1) #define FLIP(i) (-(i)-2) #define IS_FLIPPED(i) ((i) < 0) +#define IS_ZOMBIE(i) ((i) < 0) #define IS_NOT_FLIPPED(i) ((i) >= 0) +#define IS_NOT_ZOMBIE(i) ((i) >= 0) #define UNFLIP(i) (((i) < 0) ? FLIP(i) : (i)) // true if a matrix has pending tuples @@ -2145,7 +2156,7 @@ void GB_Flag_free ( ) ; // free the Flag array if (pleft == pright) \ { \ int64_t i2 = X [pleft] ; \ - is_zombie = IS_FLIPPED (i2) ; \ + is_zombie = IS_ZOMBIE (i2) ; \ if (is_zombie) \ { \ i2 = FLIP (i2) ; \ @@ -2215,74 +2226,4 @@ void GB_Flag_free ( ) ; // free the Flag array // MAX for floating-point, same as max(x,y,'includenan') in MATLAB #define FMAX(x,y) ((isnan (x) || isnan (y)) ? NAN : IMAX (x,y)) -//------------------------------------------------------------------------------ -// empty: for masked matrix multiply, C=A*B -//------------------------------------------------------------------------------ - -// empty: return true if column j of matrix A is empty. If not empty, -// return the first and last row index in the column, and Ap [j] and Ap [j+1] - -static inline bool empty -( - const int64_t *restrict Ap, - const int64_t *restrict Ai, - int64_t j, - int64_t *pstart, - int64_t *pend, - int64_t *ilo, - int64_t *ihi -) -{ - (*pstart) = Ap [j] ; - (*pend) = Ap [j+1] ; - if ((*pstart) < (*pend)) - { - // column j has at least one entry; return false and find ilo and ihi - (*ilo) = Ai [(*pstart)] ; - (*ihi) = Ai [(*pend)-1] ; - return (false) ; - } - else - { - // column j is empty - return (true) ; - } -} - -//------------------------------------------------------------------------------ -// scatter_mask: for masked matrix multiply, C=A*B -//------------------------------------------------------------------------------ - -// scatter Mask(:,j) into Flag if it hasn't already been done - -static inline void scatter_mask -( - const int64_t pm1, // pm1 = Maskp [j] - const int64_t pm2, // pm2 = Maskp [j+1] - GB_cast_function cast_Mask_to_bool, // cast function for Maskx - const int64_t *restrict Maski, // row indices of Mask - const void *restrict Maskx, // values of Mask - const size_t msize, // size of Mask entries - int8_t *restrict Flag, // array of size Mask->nrows - bool *marked // true if Mask already scattered -) -{ - if (!(*marked)) - { - for (int64_t p = pm1 ; p < pm2 ; p++) - { - // Mij = (bool) Mask (i,j) - bool Mij ; - cast_Mask_to_bool (&Mij, Maskx +(p*msize), 0) ; - if (Mij) - { - // M(i,j) is true - Flag [Maski [p]] = 1 ; - } - } - (*marked) = true ; - } -} - #endif - diff --git a/GraphBLAS/Source/GB_AxB_builtin.c b/GraphBLAS/Source/GB_AxB_builtin.c index 905a93f19d..5374f0ce74 100644 --- a/GraphBLAS/Source/GB_AxB_builtin.c +++ b/GraphBLAS/Source/GB_AxB_builtin.c @@ -20,6 +20,8 @@ #ifndef GBCOMPACT +#include "GB_AxB_methods.h" + // A semiring is defined by a binary "multiply" operator, and an associative // "add" monoid. For a built-in semiring, the multiply op can be any one of // 256 built-in binary operators. @@ -205,7 +207,6 @@ bool GB_AxB_builtin // true if C=A*B is handled const GrB_Matrix Mask, // Mask matrix for C (not complemented) const GrB_Matrix A, // input matrix const GrB_Matrix B, // input matrix - void *work, // workspace of size A->nrows == C->nrows const GrB_Semiring semiring, // semiring that defines C=A*B const bool flipxy // if true, do z=fmult(b,a) vs fmult(a,b) ) @@ -215,14 +216,23 @@ bool GB_AxB_builtin // true if C=A*B is handled // check inputs //-------------------------------------------------------------------------- - ASSERT_OK (GB_check (C, "C input for builtin AxB", 0)) ; - ASSERT_OK_OR_NULL (GB_check (Mask, "Mask input for builtin AxB", 0)) ; + if (Mask == NULL) + { + // C contains the pattern of C=A*B + ASSERT_OK (GB_check (C, "C input for builtin AxB", 0)) ; + } + else + { + // Mask is present. C->p and C->i are allocated but not initialized + ASSERT (C != NULL && C->p != NULL && C->i != NULL) ; + ASSERT (!C->p_shallow && !C->i_shallow) ; + ASSERT_OK (GB_check (Mask, "Mask input for builtin AxB", 0)) ; + } ASSERT_OK (GB_check (A, "A for builtin AxB", 0)) ; ASSERT_OK (GB_check (B, "B for builtin AxB", 0)) ; ASSERT (!PENDING (C)) ; ASSERT (!ZOMBIES (C)) ; ASSERT (!PENDING (A)) ; ASSERT (!ZOMBIES (A)) ; ASSERT (!PENDING (B)) ; ASSERT (!ZOMBIES (B)) ; - ASSERT (work != NULL) ; ASSERT_OK (GB_check (semiring, "semiring for builtin", 0)) ; ASSERT (C->type == semiring->add->op->ztype) ; @@ -236,17 +246,6 @@ bool GB_AxB_builtin // true if C=A*B is handled return (false) ; } - //-------------------------------------------------------------------------- - // get the Flag workspace (already allocated and cleared) - //-------------------------------------------------------------------------- - - int8_t *restrict Flag = NULL ; - if (Mask != NULL) - { - Flag = GB_thread_local.Flag ; - ASSERT_FLAG_IS_CLEAR ; - } - //-------------------------------------------------------------------------- // define the worker for the switch factory //-------------------------------------------------------------------------- @@ -259,139 +258,12 @@ bool GB_AxB_builtin // true if C=A*B is handled // additional hard-coded workers, if the set of built-in operators is // extended. - // The void * work array has size C->nrows * sizeof (ztype). It is - // uninitialized on input, and its contents are not defined on output. - - const int64_t n = C->ncols ; - const int64_t *restrict Ap = A->p ; - const int64_t *restrict Ai = A->i ; - const int64_t *restrict Bp = B->p ; - const int64_t *restrict Bi = B->i ; - - const int64_t *restrict Maskp = NULL ; - const int64_t *restrict Maski = NULL ; - const void *restrict Maskx = NULL ; - GB_cast_function cast_Mask_to_bool = NULL ; - size_t msize = 0 ; - - if (Mask != NULL) - { - // get the mask - Maskp = Mask->p ; - Maski = Mask->i ; - Maskx = Mask->x ; - cast_Mask_to_bool = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; - msize = Mask->type->size ; - // Cp will soon be defined below - C->magic = MAGIC ; - } + #define GB_AXB(add,mult,xyname) GB_AxB_ ## add ## mult ## xyname - #define AxB(ztype,xytype,identity) \ - { \ - ztype *restrict w = work ; \ - ztype *restrict Cx = C->x ; \ - const xytype *restrict Ax = A->x ; \ - const xytype *restrict Bx = B->x ; \ - if (Mask != NULL) \ - { \ - int64_t cnz = 0 ; \ - int64_t *restrict Cp = C->p ; \ - int64_t *restrict Ci = C->i ; \ - for (int64_t j = 0 ; j < n ; j++) \ - { \ - /* log the start of C(:,j) */ \ - Cp [j] = cnz ; \ - /* get Mask(:,j) and skip if empty */ \ - int64_t pm1, pm2, mlo, mhi ; \ - if (empty (Maskp, Maski, j, &pm1, &pm2, &mlo, &mhi)) continue ;\ - bool marked = false ; \ - /* compute C(;,j) */ \ - for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) \ - { \ - /* B(k,j) is present */ \ - int64_t k = Bi [p] ; \ - /* get A(:,k) and skip if empty */ \ - int64_t pa1, pa2, alo, ahi ; \ - if (empty (Ap, Ai, k, &pa1, &pa2, &alo, &ahi)) continue ; \ - /* skip if all A(:,k) entries outside range of Mask(:,j)*/ \ - if (ahi < mlo || alo > mhi) continue ; \ - /* scatter Mask(:,j) into Flag if not yet done */ \ - scatter_mask (pm1, pm2, cast_Mask_to_bool, \ - Maski, Maskx, msize, Flag, &marked) ; \ - xytype bkj = Bx [p] ; \ - for (int64_t pa = pa1 ; pa < pa2 ; pa++) \ - { \ - /* w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) */ \ - int64_t i = Ai [pa] ; \ - int8_t flag = Flag [i] ; \ - if (flag == 0) continue ; \ - /* Mask(i,j) == 1 so do the work */ \ - xytype aik = Ax [pa] ; \ - ztype t = MULT (aik, bkj) ; \ - if (flag > 0) \ - { \ - /* first time C(i,j) seen */ \ - Flag [i] = -1 ; \ - w [i] = t ; \ - } \ - else \ - { \ - /* C(i,j) seen before, update it */ \ - ADD (w [i], t) ; \ - } \ - } \ - } \ - /* gather C(:,j), both values and pattern */ \ - if (marked) \ - { \ - for (int64_t p = pm1 ; p < pm2 ; p++) \ - { \ - int64_t i = Maski [p] ; \ - if (Flag [i] < 0) \ - { \ - Cx [cnz] = w [i] ; \ - Ci [cnz++] = i ; \ - } \ - Flag [i] = 0 ; \ - } \ - } \ - } \ - Cp [n] = cnz ; \ - } \ - else \ - { \ - const int64_t *restrict Cp = C->p ; \ - const int64_t *restrict Ci = C->i ; \ - for (int64_t j = 0 ; j < n ; j++) \ - { \ - /* clear w */ \ - for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) \ - { \ - w [Ci [p]] = identity ; \ - } \ - /* compute C(;,j) */ \ - for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) \ - { \ - /* B(k,j) is present */ \ - int64_t k = Bi [p] ; \ - xytype bkj = Bx [p] ; \ - for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) \ - { \ - /* w [i] += A(i,k) * B(k,j) */ \ - int64_t i = Ai [pa] ; \ - xytype aik = Ax [pa] ; \ - ztype t = MULT (aik, bkj) ; \ - ADD (w [i], t) ; \ - } \ - } \ - /* gather C(:,j) */ \ - for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) \ - { \ - Cx [p] = w [Ci [p]] ; \ - } \ - } \ - } \ - return (true) ; \ + #define AxB(add,mult,xyname) \ + { \ + GB_AXB (add,mult,xyname) (C, Mask, A, B, flipxy) ; \ + return (true) ; \ } //-------------------------------------------------------------------------- @@ -400,6 +272,7 @@ bool GB_AxB_builtin // true if C=A*B is handled #include "GB_AxB_factory.c" #undef AxB + #undef GB_AXB //-------------------------------------------------------------------------- // no built-in worker for this semiring diff --git a/GraphBLAS/Source/GB_AxB_methods.h b/GraphBLAS/Source/GB_AxB_methods.h new file mode 100644 index 0000000000..9f2daa20f3 --- /dev/null +++ b/GraphBLAS/Source/GB_AxB_methods.h @@ -0,0 +1,213 @@ +//------------------------------------------------------------------------------ +// GB_AxB_methods.h: definitions for GB_AxB_builtin.c and GB_Matrix_AdotB.c +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +#include "GB.h" + +#ifndef GB_AXB_METHODS_H +#define GB_AXB_METHODS_H + +//------------------------------------------------------------------------------ +// empty: for masked matrix multiply, C=A*B +//------------------------------------------------------------------------------ + +// empty: return true if column j of matrix A is empty. If not empty, +// return the first and last row index in the column, and Ap [j] and Ap [j+1] + +static inline bool empty +( + const int64_t *restrict Ap, + const int64_t *restrict Ai, + int64_t j, + int64_t *ilo, + int64_t *ihi +) +{ + int64_t pstart = Ap [j] ; + int64_t pend = Ap [j+1] ; + if (pstart < pend) + { + // column j has at least one entry; return false and find ilo and ihi + (*ilo) = Ai [pstart] ; + (*ihi) = Ai [pend-1] ; + return (false) ; + } + else + { + // column j is empty + return (true) ; + } +} + +//------------------------------------------------------------------------------ +// scatter_mask: for masked matrix multiply, C=A*B +//------------------------------------------------------------------------------ + +// scatter Mask(:,j) into Flag if it hasn't already been done + +static inline void scatter_mask +( + const int64_t j, // column to scatter + const int64_t *restrict Maskp, // column pointers of Mask + const int64_t *restrict Maski, // row indices of Mask + const void *restrict Maskx, // values of Mask + const size_t msize, // size of Mask entries + GB_cast_function cast_Mask_to_bool, // cast function for Maskx + int8_t *restrict Flag, // array of size Mask->nrows + bool *marked // true if Mask already scattered +) +{ + if (!(*marked)) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + // Mij = (bool) Mask (i,j) + bool Mij ; + cast_Mask_to_bool (&Mij, Maskx +(p*msize), 0) ; + if (Mij) + { + // M(i,j) is true + Flag [Maski [p]] = 1 ; + } + } + (*marked) = true ; + } +} + +//------------------------------------------------------------------------------ +// jinit: initializations for computing C(:,j) in AdotB +//------------------------------------------------------------------------------ + +static inline bool jinit // true if there any work to do for C(:,j) +( + // inputs, not modified: + int64_t *restrict Cp, // column pointers of C + const int64_t j, // column j to compute + const int64_t cnz, // number of entries in C, so far + const int64_t *restrict Bp, // column pointers of B + const int64_t *restrict Bi, // row indices of B + const int64_t *restrict Maskp, // column pointers of Mask + const int64_t m, // number of rows of C and A + + // outputs, not defined on input: + int64_t *restrict pb_start, // start of B(:,j) + int64_t *restrict pb_end, // end of B(:,j) + int64_t *restrict bjnz, // number of entries in B(:,j) + int64_t *restrict ib_first, // first row index in B(:,j) + int64_t *restrict ib_last, // last row index in B(:,j) + int64_t *restrict kk1, // first iteration counter for C(:,j) + int64_t *restrict kk2 // last iteration counter for C(:,j) +) +{ + + // log the start of column j of C + Cp [j] = cnz ; + + // get the start and end of column B(:,j) + (*pb_start) = Bp [j] ; + (*pb_end) = Bp [j+1] ; + (*bjnz) = (*pb_end) - (*pb_start) ; + + if ((*bjnz) == 0) + { + // B(:,j) has no entries, no work to do + return (false) ; + } + + // row indices of first and last entry in B(:,j) + (*ib_first) = Bi [(*pb_start)] ; + (*ib_last) = Bi [(*pb_end)-1] ; + + // iterate for each possible entry in C(:,j) + if (Maskp == NULL) + { + // compute all of C(:,j) + (*kk1) = 0 ; + (*kk2) = m ; + } + else + { + // C(i,j) can appear only if Mask(i,j)=1, so iterate over Mask(:,j) + (*kk1) = Maskp [j] ; + (*kk2) = Maskp [j+1] ; + } + + // B(:,j) has entries; there is work to do + return (true) ; +} + +//------------------------------------------------------------------------------ +// cij_init: initializations for computing C(i,j), for AdotB +//------------------------------------------------------------------------------ + +static inline bool cij_init // true if work to do, false if zombie +( + // inputs, not modified: + const int64_t kk, // iteration counter + const int64_t *restrict Maski, // Mask row indices + const void *restrict Maskx, // Mask values + const GB_cast_function cast_Mask, // typecasting function for Mask to bool + const size_t msize, // size of Mask entries + + const int64_t *restrict Ap, // column pointers of A + const int64_t *restrict Ai, // row indices of A + const int64_t ib_first, // first row index in B(:,j) + const int64_t ib_last, // last row index in B(:,j) + const int64_t pb_start, // start of B(:,j) + + // outputs, not defined on input: + int64_t *restrict i, // row index i for computing C(i,j) + int64_t *restrict pa, // start of A(:,i) + int64_t *restrict pa_end, // end of A(:,i) + int64_t *restrict pb, // start of B(:,j) + int64_t *restrict ainz // number of entries in A(:,i) +) +{ + + // get the row index of C(i,j) and the value of Mask(i,j) + if (Maski == NULL) + { + (*i) = kk ; + } + else + { + bool Mij ; + (*i) = Maski [kk] ; + cast_Mask (&Mij, Maskx + (kk*msize), 0) ; + if (!Mij) + { + // Mask(i,j) = 0, so no need to compute C(i,j) + return (false) ; + } + } + + // get the start and end of column A(:,i) + (*pa) = Ap [(*i)] ; + (*pa_end) = Ap [(*i)+1] ; + (*ainz) = (*pa_end) - (*pa) ; + + // quick checks that imply C(i,j) is symbolically zero + if ((*ainz) == 0 || Ai [(*pa_end)-1] < ib_first || ib_last < Ai [(*pa)]) + { + // no work to do + return (false) ; + } + + // get the start of column B(:,j) + (*pb) = pb_start ; + + return (true) ; +} + +//------------------------------------------------------------------------------ +// built-in semirings +//------------------------------------------------------------------------------ + +#include "GB_AxB__semirings.h" + +#endif diff --git a/GraphBLAS/Source/GB_AxB_numeric.c b/GraphBLAS/Source/GB_AxB_numeric.c index eac9301eff..a35f80e26c 100644 --- a/GraphBLAS/Source/GB_AxB_numeric.c +++ b/GraphBLAS/Source/GB_AxB_numeric.c @@ -20,18 +20,16 @@ // This option is used when C=A'*B' is computed via C=(B*A)' by GrB_mxm, and // for all uses of GrB_vxm. -// If the Mask is not NULL, then the pattern of C has not been computed. -// C->p and C->i are only allocated. This function computes the pattern of -// C as a subset of the Mask. +// If the Mask is not NULL, then the pattern of C has not been computed. C->p +// and C->i are only allocated. This function constucts the pattern of C as +// the same as the Mask, but with zombies if an entry appears in the Mask but +// not in A*B. // FUTURE: this can be done in parallel. The computation of each column C(:,j) // is an independent task. Each thread would need its own Flag and Work array. -// When the Mask is present, rather than computing a subset of the Mask, the -// pattern of the Mask can be used as-is, except that any entry not in C would -// be flagged as a zombie. Then a parallel prefix sum method could be used to -// delete the zombies, if necessary. #include "GB.h" +#include "GB_AxB_methods.h" GrB_Info GB_AxB_numeric // compute the values of C = A*B ( @@ -117,7 +115,8 @@ GrB_Info GB_AxB_numeric // compute the values of C = A*B // free C->x unless it is shallow, then reallocate it at the right size if (!C->x_shallow) { - GB_FREE_MEMORY (C->x) ; + // C->x is normally NULL already so this should do nothing + GB_FREE_MEMORY (C->x, C->nzmax, zsize) ; } GB_MALLOC_MEMORY (C->x, C->nzmax, zsize) ; C->x_shallow = false ; @@ -192,7 +191,7 @@ GrB_Info GB_AxB_numeric // compute the values of C = A*B // With a low flop count, the full symbolic analysis becomes very cheap. // As a result, if the flop count is low, Mask must be NULL both here and - // in the symbolic analsys. + // in the symbolic analysis. bool a_cast = atype_required->code != A->type->code ; bool b_cast = btype_required->code != B->type->code ; @@ -351,7 +350,7 @@ GrB_Info GB_AxB_numeric // compute the values of C = A*B // created, and this function is not used. All C=A*B computations are done // with the generic worker below. - if (GB_AxB_builtin (C, Mask, A2, B2, w, semiring, flipxy)) + if (GB_AxB_builtin (C, Mask, A2, B2, semiring, flipxy)) { // C = A2*B2 has been done via a hard-coded case; free memory and return GB_MATRIX_FREE (&A2) ; @@ -394,9 +393,12 @@ GrB_Info GB_AxB_numeric // compute the values of C = A*B // C = A*B, using Mask as a superset of the symbolic pattern of C //---------------------------------------------------------------------- - // get the function pointer for casting Mask(i,j) from its current - // type into boolean - GB_cast_function cast_Mask_to_bool = + // The pattern of C and Mask are the same, except that the Mask has no + // zombies but C may have them. Entries in the Mask but not in A*B + // become zombies in C. + + // get cast function for casting Mask(i,j) from current type to boolean + GB_cast_function cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; const int64_t *restrict Maski = Mask->i ; @@ -406,34 +408,25 @@ GrB_Info GB_AxB_numeric // compute the values of C = A*B char bkj [bsize] ; - int64_t cnz = 0 ; - int64_t *restrict Cp = C->p ; int64_t *restrict Ci = C->i ; + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + for (int64_t j = 0 ; j < n ; j++) { - // log the start of C(:j) ; - Cp [j] = cnz ; - // get Mask(:,j) and skip if empty - int64_t pm1, pm2, mlo, mhi ; - if (empty (Maskp, Maski, j, &pm1, &pm2, &mlo, &mhi)) continue ; + // scatter Mask(:,j) into Flag bool marked = false ; + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; // compute C(:,j) = A * B(:,j), both values and pattern - for (int64_t pb = Bp [j] ; pb < Bp [j+1] ; pb++) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) { // B(k,j) is present - int64_t k = Bi [pb] ; - /* get A(:,k) and skip if empty */ - int64_t pa1, pa2, alo, ahi ; - if (empty (Ap, Ai, k, &pa1, &pa2, &alo, &ahi)) continue ; - /* skip if all A(:,k) entries outside range of Mask(:,j)*/ - if (ahi < mlo || alo > mhi) continue ; - /* scatter Mask(:,j) into Flag if not yet done */ - scatter_mask (pm1, pm2, cast_Mask_to_bool, - Maski, Maskx, msize, Flag, &marked) ; - // bkj = B(k,j) - memcpy (bkj, Bx +(pb*bsize), bsize) ; - for (int64_t pa = pa1 ; pa < pa2 ; pa++) + int64_t k = Bi [p] ; + memcpy (bkj, Bx +(p*bsize), bsize) ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) { // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) int64_t i = Ai [pa] ; @@ -467,25 +460,33 @@ GrB_Info GB_AxB_numeric // compute the values of C = A*B } } } - // gather C(:,j), both values and pattern - if (marked) + // gather C(:,j), both values and pattern, from the Mask(:,j) + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) { - for (int64_t p = pm1 ; p < pm2 ; p++) + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) { - int64_t i = Maski [p] ; - if (Flag [i] < 0) - { - // Cx [cnz] = w [i] ; - memcpy (Cx +(cnz*zsize), w +(i*zsize), zsize) ; - Ci [cnz++] = i ; - } - Flag [i] = 0 ; + // C(i,j) is a live entry, gather its row and value + // Cx [p] = w [i] ; + memcpy (Cx +(p*zsize), w +(i*zsize), zsize) ; + Ci [p] = i ; } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + // Cx [p] left uninitialized, or this could be done: + // memcpy (Cx +(p*zsize), identity, zsize) ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; } } - Cp [n] = cnz ; - C->magic = MAGIC ; ASSERT_FLAG_IS_CLEAR ; + ASSERT (ZOMBIES_OK (C)) ; + GB_queue_insert (C) ; + ASSERT_OK (GB_check (C, "C = A*B, with built-in mask", 0)) ; } else @@ -513,7 +514,6 @@ GrB_Info GB_AxB_numeric // compute the values of C = A*B { // B(k,j) is present int64_t k = Bi [p] ; - // bkj = B(k,j) memcpy (bkj, Bx +(p*bsize), bsize) ; for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) { @@ -542,6 +542,7 @@ GrB_Info GB_AxB_numeric // compute the values of C = A*B memcpy (Cx +(p*zsize), w +((Ci [p])*zsize), zsize) ; } } + ASSERT (!ZOMBIES (C)) ; } //-------------------------------------------------------------------------- diff --git a/GraphBLAS/Source/GB_AxB_symbolic.c b/GraphBLAS/Source/GB_AxB_symbolic.c index cb5b5bfe72..893e6cc562 100644 --- a/GraphBLAS/Source/GB_AxB_symbolic.c +++ b/GraphBLAS/Source/GB_AxB_symbolic.c @@ -272,6 +272,7 @@ GrB_Info GB_AxB_symbolic // pattern of C = A*B, A'*B, A*B', or A'*B' cmax = cnz + anrows ; if (cmax > C->nzmax) { + int64_t cold = C->nzmax ; int64_t cnew = 4*(C->nzmax + anrows) ; GB_REALLOC_MEMORY (Ci, cnew, C->nzmax, sizeof (int64_t), &ok) ; Ci_memory = GBYTES (C->nzmax, sizeof (int64_t)) ; @@ -280,7 +281,7 @@ GrB_Info GB_AxB_symbolic // pattern of C = A*B, A'*B, A*B', or A'*B' // out of memory GB_MATRIX_FREE (&AT) ; GB_MATRIX_FREE (&BT) ; - GB_FREE_MEMORY (Ci) ; + GB_FREE_MEMORY (Ci, cold, sizeof (int64_t)) ; GB_Matrix_clear (C) ; GB_Mark_free ( ) ; GB_Work_free ( ) ; @@ -511,7 +512,7 @@ GrB_Info GB_AxB_symbolic // pattern of C = A*B, A'*B, A*B', or A'*B' if (C->i == NULL) { // out of memory - GB_FREE_MEMORY (Ci) ; + GB_FREE_MEMORY (Ci, cnew, sizeof (int64_t)) ; GB_Matrix_clear (C) ; GB_Mark_free ( ) ; GB_Work_free ( ) ; @@ -530,7 +531,7 @@ GrB_Info GB_AxB_symbolic // pattern of C = A*B, A'*B, A*B', or A'*B' // free workspace and the column-oriented form ASSERT_OK (GB_check (C, "C output for symbolic C=(A*B)'", 0)) ; - GB_FREE_MEMORY (Ci) ; // Ci no longer needed + GB_FREE_MEMORY (Ci, cnew, sizeof (int64_t)) ; // Ci no longer needed return (REPORT_SUCCESS) ; } diff --git a/GraphBLAS/Source/GB_Flag_alloc.c b/GraphBLAS/Source/GB_Flag_alloc.c index f8b05199b5..7062dc9824 100644 --- a/GraphBLAS/Source/GB_Flag_alloc.c +++ b/GraphBLAS/Source/GB_Flag_alloc.c @@ -15,10 +15,11 @@ bool GB_Flag_alloc // allocate Flag space ) { - if (Flag_required > GB_thread_local.Flag_size) + int64_t currsize = GB_thread_local.Flag_size ; + if (Flag_required > currsize) { // free the existing space - GB_FREE_MEMORY (GB_thread_local.Flag) ; + GB_FREE_MEMORY (GB_thread_local.Flag, currsize, sizeof (int8_t)) ; GB_thread_local.Flag_size = 0 ; // calloc the new space diff --git a/GraphBLAS/Source/GB_Flag_free.c b/GraphBLAS/Source/GB_Flag_free.c index 9a13190a45..7b58b1ba4f 100644 --- a/GraphBLAS/Source/GB_Flag_free.c +++ b/GraphBLAS/Source/GB_Flag_free.c @@ -11,7 +11,8 @@ void GB_Flag_free ( ) // free the Flag array { - GB_FREE_MEMORY (GB_thread_local.Flag) ; + int64_t currsize = GB_thread_local.Flag_size ; + GB_FREE_MEMORY (GB_thread_local.Flag, currsize, sizeof (int8_t)) ; GB_thread_local.Flag_size = 0 ; } diff --git a/GraphBLAS/Source/GB_Mark_alloc.c b/GraphBLAS/Source/GB_Mark_alloc.c index c4e30845eb..59d6d524cb 100644 --- a/GraphBLAS/Source/GB_Mark_alloc.c +++ b/GraphBLAS/Source/GB_Mark_alloc.c @@ -15,10 +15,11 @@ bool GB_Mark_alloc // allocate Mark space ) { - if (Mark_required > GB_thread_local.Mark_size) + int64_t currsize = GB_thread_local.Mark_size ; + if (Mark_required > currsize) { // free the existing space - GB_FREE_MEMORY (GB_thread_local.Mark) ; + GB_FREE_MEMORY (GB_thread_local.Mark, currsize, sizeof (int64_t)) ; GB_thread_local.Mark_size = 0 ; // calloc the new space diff --git a/GraphBLAS/Source/GB_Mark_free.c b/GraphBLAS/Source/GB_Mark_free.c index 8c1ab7f05c..27e790b19c 100644 --- a/GraphBLAS/Source/GB_Mark_free.c +++ b/GraphBLAS/Source/GB_Mark_free.c @@ -11,7 +11,8 @@ void GB_Mark_free ( ) // free the Mark array { - GB_FREE_MEMORY (GB_thread_local.Mark) ; + int64_t currsize = GB_thread_local.Mark_size ; + GB_FREE_MEMORY (GB_thread_local.Mark, currsize, sizeof (int64_t)) ; GB_thread_local.Mark_size = 0 ; GB_thread_local.Mark_flag = 1 ; } diff --git a/GraphBLAS/Source/GB_Matrix_AdotB.c b/GraphBLAS/Source/GB_Matrix_AdotB.c index 8b94105d15..adff517dd0 100644 --- a/GraphBLAS/Source/GB_Matrix_AdotB.c +++ b/GraphBLAS/Source/GB_Matrix_AdotB.c @@ -39,207 +39,7 @@ // method to delete zombies in parallel. #include "GB.h" - -//------------------------------------------------------------------------------ -// jinit: initializations for computing C(:,j) -//------------------------------------------------------------------------------ - -static bool jinit // true if there any work to do for C(:,j) -( - // inputs, not modified: - int64_t *Cp, // column pointers of C - const int64_t j, // column j to compute - const int64_t cnz, // number of entries in C, so far - const int64_t *Bp, // column pointers of B - const int64_t *Bi, // row indices of B - const int64_t *Maskp, // column pointers of Mask - const int64_t m, // number of rows of C and A - - // outputs, not defined on input: - int64_t *pb_start, // start of B(:,j) - int64_t *pb_end, // end of B(:,j) - int64_t *bjnz, // number of entries in B(:,j) - int64_t *ib_first, // first row index in B(:,j) - int64_t *ib_last, // last row index in B(:,j) - int64_t *kk1, // first iteration counter for C(:,j) - int64_t *kk2 // last iteration counter for C(:,j) -) -{ - - // log the start of column j of C - Cp [j] = cnz ; - - // get the start and end of column B(:,j) - (*pb_start) = Bp [j] ; - (*pb_end) = Bp [j+1] ; - (*bjnz) = (*pb_end) - (*pb_start) ; - - if ((*bjnz) == 0) - { - // B(:,j) has no entries, no work to do - return (false) ; - } - - // row indices of first and last entry in B(:,j) - (*ib_first) = Bi [(*pb_start)] ; - (*ib_last) = Bi [(*pb_end)-1] ; - - // iterate for each possible entry in C(:,j) - if (Maskp == NULL) - { - // compute all of C(:,j) - (*kk1) = 0 ; - (*kk2) = m ; - } - else - { - // C(i,j) can appear only if Mask(i,j)=1, so iterate over Mask(:,j) - (*kk1) = Maskp [j] ; - (*kk2) = Maskp [j+1] ; - } - - // B(:,j) has entries; there is work to do - return (true) ; -} - -//------------------------------------------------------------------------------ -// imask: return the next row index i for computing the entry C(i,j) -//------------------------------------------------------------------------------ - -static int64_t imask // row index i, or -1 if this entry is skipped -( - // inputs, not modified: - const int64_t kk, // iteration counter - const int64_t *Maski, // Mask row indices - const void *Maskx, // Mask values - const GB_cast_function cast_Mask, // typecasting function for Mask to bool - const size_t msize // size of Mask entries -) -{ - int64_t i ; - if (Maski == NULL) - { - i = kk ; - } - else - { - bool Mij ; - i = Maski [kk] ; - cast_Mask (&Mij, Maskx + (kk*msize), 0) ; - if (!Mij) - { - // Mask(i,j) = 0, so no need to compute C(i,j) - return (-1) ; - } - } - return (i) ; -} - -//------------------------------------------------------------------------------ -// cij_init: initializations for computing C(i,j) -//------------------------------------------------------------------------------ - -static bool cij_init // true if work to do, false otherwise -( - // inputs, not modified: - const int64_t i, // row index i for computing C(i,j) - const int64_t *Ap, // column pointers of A - const int64_t *Ai, // row indices of A - const int64_t ib_first, // first row index in B(:,j) - const int64_t ib_last, // last row index in B(:,j) - const int64_t pb_start, // start of B(:,j) - const int64_t bjnz, // number of entries in B(:,j) - - // outputs, not defined on input: - int64_t *pa, // start of A(:,i) - int64_t *pa_end, // end of A(:,i) - int64_t *pb // start of B(:,j) -) -{ - // get the start and end of column A(:,i) - (*pa) = Ap [i] ; - (*pa_end) = Ap [i+1] ; - int64_t ainz = (*pa_end) - (*pa) ; - - // quick checks that imply C(i,j) is symbolically zero - if (ainz == 0 || Ai [(*pa_end)-1] < ib_first || ib_last < Ai [(*pa)]) - { - // no work to do - return (false) ; - } - - // get the start of column B(:,j) - (*pb) = pb_start ; - - return (true) ; -} - -//------------------------------------------------------------------------------ -// kmerge: get the next row index k for C(i,j) += A(k,i)*B(k,j) -//------------------------------------------------------------------------------ - -static bool kmerge // true if row index k is found -( - // inputs, not modified: - const int64_t *Ai, // row indices of A - const int64_t *Bi, // row indices of B - const int64_t pa_end, // end of A(:,i) - const int64_t pb_end, // end of B(:,j) - - // input/output: - int64_t *pa, // A(k,i) is at location pa in Ai/Ax - int64_t *pb // B(k,j) is at location pb in Bi/Bx -) -{ - - int64_t ia = Ai [(*pa)] ; - int64_t ib = Bi [(*pb)] ; - if (ia < ib) - { - - //---------------------------------------------------------------------- - // A(ia,i) appears before B(ib,j) - //---------------------------------------------------------------------- - - // discard all entries A(ia:ib-1,i) - int64_t pleft = (*pa) + 1 ; - int64_t pright = pa_end ; - GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; - ASSERT (pleft > (*pa)) ; - (*pa) = pleft ; - return (false) ; - - } - else if (ib < ia) - { - - //---------------------------------------------------------------------- - // B(ib,j) appears before A(ia,i) - //---------------------------------------------------------------------- - - // discard all entries B(ib:ia-1,j) - int64_t pleft = (*pb) + 1 ; - int64_t pright = pb_end ; - GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; - ASSERT (pleft > (*pb)) ; - (*pb) = pleft ; - return (false) ; - - } - else // ia == ib - { - - //---------------------------------------------------------------------- - // A(k,i) and B(k,j) are the next entries to merge - //---------------------------------------------------------------------- - - return (true) ; - } -} - -//------------------------------------------------------------------------------ -// GB_Matrix_AdotB: C=A'*B via dot products -//------------------------------------------------------------------------------ +#include "GB_AxB_methods.h" GrB_Info GB_Matrix_AdotB // C = A'*B using dot product method ( @@ -322,40 +122,6 @@ GrB_Info GB_Matrix_AdotB // C = A'*B using dot product method } } - //-------------------------------------------------------------------------- - // get contents of C, A, B, and Mask - //-------------------------------------------------------------------------- - - const int64_t *Ai = A->i ; - const int64_t *Bi = B->i ; - const int64_t *Ap = A->p ; - const int64_t *Bp = B->p ; - int64_t *Ci = C->i ; - int64_t *Cp = C->p ; - int64_t n = B->ncols ; - int64_t m = A->ncols ; - ASSERT (C->ncols == n) ; - ASSERT (C->nrows == m) ; - - int64_t cnz = 0 ; - - const int64_t *Maskp = NULL ; - const int64_t *Maski = NULL ; - const void *Maskx = NULL ; - GB_cast_function cast_Mask = NULL ; - size_t msize = 0 ; - - if (Mask != NULL) - { - Maskp = Mask->p ; - Maski = Mask->i ; - Maskx = Mask->x ; - msize = Mask->type->size ; - // get the function pointer for casting Mask(i,j) from its current - // type into boolean - cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; - } - //-------------------------------------------------------------------------- // C = A'*B, computing each entry with a dot product, via builtin semiring //-------------------------------------------------------------------------- @@ -368,57 +134,13 @@ GrB_Info GB_Matrix_AdotB // C = A'*B using dot product method // define the worker for the switch factory //-------------------------------------------------------------------------- - #define AxB(ztype,xytype,identity) \ - { \ - ztype *Cx = C->x ; \ - const xytype *Ax = A->x ; \ - const xytype *Bx = B->x ; \ - for (int64_t j = 0 ; j < n ; j++) \ - { \ - /* initializations for C(:,j) */ \ - int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; \ - if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, \ - &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; \ - for (int64_t kk = kk1 ; kk < kk2 ; kk++) \ - { \ - /* compute cij = A(:,i)' * B(:,j), using the semiring */ \ - ztype cij ; \ - int64_t i = imask (kk, Maski, Maskx, cast_Mask, msize) ; \ - if (i < 0) continue ; \ - bool cij_exists = false ; /* C(i,j) not yet in the pattern */\ - int64_t pa, pa_end, pb ; \ - if (!cij_init (i, Ap, Ai, ib_first, ib_last, pb_start, bjnz, \ - &pa, &pa_end, &pb)) continue ; \ - while (pa < pa_end && pb < pb_end) \ - { \ - if (kmerge (Ai, Bi, pa_end, pb_end, &pa, &pb)) \ - { \ - xytype aki = Ax [pa++] ; /* aki = A(k,i) */ \ - xytype bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ - ztype t = MULT (aki, bkj) ; \ - if (cij_exists) \ - { \ - /* cij += A(k,i) * B(k,j) */ \ - ADD (cij, t) ; \ - } \ - else \ - { \ - /* cij = A(k,i) * B(k,j) */ \ - cij_exists = true ; \ - cij = t ; \ - } \ - } \ - } \ - if (cij_exists) \ - { \ - /* C(i,j) = cij */ \ - Cx [cnz] = cij ; \ - Ci [cnz++] = i ; \ - } \ - } \ - } \ - done = true ; \ - } \ + #define GB_AdotB(add,mult,xyname) GB_AdotB_ ## add ## mult ## xyname + + #define AxB(add,mult,xyname) \ + { \ + GB_AdotB (add,mult,xyname) (C, Mask, A, B, flipxy) ; \ + done = true ; \ + } \ break ; //-------------------------------------------------------------------------- @@ -444,9 +166,40 @@ GrB_Info GB_Matrix_AdotB // C = A'*B using dot product method { //---------------------------------------------------------------------- - // get operators, functions, workspace, and contents of A, B, and C + // get operators, functions, workspace, contents of A, B, C, and Mask //---------------------------------------------------------------------- + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + // get the semiring operators GrB_BinaryOp multiply = semiring->multiply ; GrB_Monoid add = semiring->add ; @@ -474,6 +227,8 @@ GrB_Info GB_Matrix_AdotB // C = A'*B using dot product method void *Cx = C->x ; void *cij = Cx ; // advances through each entry of C + void *identity = add->identity ; + GB_cast_function cast_A, cast_B ; if (flipxy) { @@ -492,59 +247,232 @@ GrB_Info GB_Matrix_AdotB // C = A'*B using dot product method // C = A'*B via dot products, function pointers, and typecasting //---------------------------------------------------------------------- + #define GET(pa,pb) \ + { \ + /* aki = A(k,i), located in Ax [pa] */ \ + cast_A (aki, Ax +((pa)*asize), asize) ; \ + /* bkj = B(k,j), located in Bx [pb] */ \ + cast_B (bkj, Bx +((pb)*bsize), bsize) ; \ + } + + #define MULT \ + { \ + if (flipxy) \ + { \ + /* zwork = bkj * aki */ \ + fmult (zwork, bkj, aki) ; \ + } \ + else \ + { \ + /* zwork = aki * bkj */ \ + fmult (zwork, aki, bkj) ; \ + } \ + } + + #define ADD \ + { \ + /* cij += A(k,i) * B(k,j), and add to the pattern */ \ + /* cwork = cij */ \ + memcpy (cwork, cij, csize) ; \ + /* cij = cwork + zwork */ \ + fadd (cij, cwork, zwork) ; \ + } + + #define MERGE \ + { \ + GET (pa, pb) ; \ + MULT ; \ + if (cij_exists) \ + { \ + ADD ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j), and add to the pattern */ \ + cij_exists = true ; \ + /* cij = cwork */ \ + memcpy (cij, zwork, csize) ; \ + } \ + pa++ ; \ + pb++ ; \ + } + for (int64_t j = 0 ; j < n ; j++) { - // initializations for C(:,j) + + //------------------------------------------------------------------ + // C(:,j) = A'*B(:,j) + //------------------------------------------------------------------ + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + for (int64_t kk = kk1 ; kk < kk2 ; kk++) { + + //-------------------------------------------------------------- // compute cij = A(:,i)' * B(:,j), using the semiring - int64_t i = imask (kk, Maski, Maskx, cast_Mask, msize) ; - if (i < 0) continue ; + //-------------------------------------------------------------- + bool cij_exists = false ; // C(i,j) not yet in the pattern - int64_t pa, pa_end, pb ; - if (!cij_init (i, Ap, Ai, ib_first, ib_last, pb_start, bjnz, - &pa, &pa_end, &pb)) continue ; - while (pa < pa_end && pb < pb_end) + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //---------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //---------------------------------------------------------- + + cij_exists = true ; + memcpy (cij, identity, csize) ; + for (int64_t k = 0 ; k < nrows ; k++) + { + GET (pa+k, pb+k) ; + MULT ; + ADD ; + } + + } + else if (ainz == nrows) + { + + //---------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //---------------------------------------------------------- + + cij_exists = true ; + memcpy (cij, identity, csize) ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + GET (pa+k, pb) ; + MULT ; + ADD ; + } + + } + else if (bjnz == nrows) { - if (kmerge (Ai, Bi, pa_end, pb_end, &pa, &pb)) + + //---------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //---------------------------------------------------------- + + cij_exists = true ; + memcpy (cij, identity, csize) ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + GET (pa, pb+k) ; + MULT ; + ADD ; + } + + } + else if (ainz > 32 * bjnz) + { + + //---------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //---------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //---------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //---------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) { - // aki = A(k,i), located in Ax [pa] - cast_A (aki, Ax +(pa*asize), asize) ; - // bkj = B(k,j), located in Bx [pb] - cast_B (bkj, Bx +(pb*bsize), bsize) ; - if (flipxy) + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k { - // zwork = bkj * aki - fmult (zwork, bkj, aki) ; + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; } - else + } + + } + else + { + + //---------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //---------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) { - // zwork = aki * bkj - fmult (zwork, aki, bkj) ; + // A(ia,i) appears before B(ib,j) + pa++ ; } - if (cij_exists) + else if (ib < ia) { - // cij += A(k,i) * B(k,j), and add to the pattern - // cwork = cij - memcpy (cwork, cij, csize) ; - // cij = cwork + zwork - fadd (cij, cwork, zwork) ; + // B(ib,j) appears before A(ia,i) + pb++ ; } - else + else // ia == ib { - // cij = A(k,i) * B(k,j), and add to the pattern - // note that semiring->add->identity is not required - cij_exists = true ; - // cij = cwork - memcpy (cij, zwork, csize) ; + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; } - pa++ ; - pb++ ; } } + if (cij_exists) { // C(i,j) = cij @@ -553,26 +481,26 @@ GrB_Info GB_Matrix_AdotB // C = A'*B using dot product method } } } + // log the end of the last column + Cp [n] = cnz ; } - //-------------------------------------------------------------------------- - // wrapup - //-------------------------------------------------------------------------- - - // log the end of the last column - Cp [n] = cnz ; - C->magic = MAGIC ; // C is now initialized ] - //-------------------------------------------------------------------------- // trim the size of C: this cannot fail //-------------------------------------------------------------------------- + C->magic = MAGIC ; // C is now initialized ] ASSERT (cnz <= C->nzmax) ; - bool ok = GB_Matrix_realloc (C, cnz, true, NULL) ; + bool ok = GB_Matrix_realloc (C, NNZ (C), true, NULL) ; ASSERT (ok) ; ASSERT_OK (GB_check (C, "C = A'*B output", 0)) ; return (REPORT_SUCCESS) ; } #undef AxB +#undef GB_AdotB +#undef MERGE +#undef GET +#undef MULT +#undef ADD diff --git a/GraphBLAS/Source/GB_Matrix_alloc.c b/GraphBLAS/Source/GB_Matrix_alloc.c index 0bcfe0a45d..cc1851425a 100644 --- a/GraphBLAS/Source/GB_Matrix_alloc.c +++ b/GraphBLAS/Source/GB_Matrix_alloc.c @@ -65,8 +65,8 @@ bool GB_Matrix_alloc // allocate space in a matrix if (A->i == NULL || (numeric && A->x == NULL)) { // out of memory - GB_FREE_MEMORY (A->x) ; - GB_FREE_MEMORY (A->i) ; + GB_FREE_MEMORY (A->x, A->nzmax, A->type->size) ; + GB_FREE_MEMORY (A->i, A->nzmax, sizeof (int64_t)) ; return (false) ; } diff --git a/GraphBLAS/Source/GB_Matrix_dup.c b/GraphBLAS/Source/GB_Matrix_dup.c index c8bf85e2a4..826d02437d 100644 --- a/GraphBLAS/Source/GB_Matrix_dup.c +++ b/GraphBLAS/Source/GB_Matrix_dup.c @@ -10,6 +10,18 @@ // C = A, making a deep copy. Not user-callable; this function does the work // for user-callable functions GrB_*_dup. +// (*handle) and A might be identical, with GrB_Matrix_dup (&A, A), so the +// final output is written into the handle only at the very last step. The +// input matrix A will be lost, and will result in a memory leak, unless the +// user application does: + +// B = A ; +// GrB_Matrix (&A, A) ; +// GrB_free (&A) ; +// GrB_free (&B) ; + +// A is the new copy and B is the old copy. Each should be freed when done. + #include "GB.h" GrB_Info GB_Matrix_dup // make an exact copy of a matrix @@ -45,13 +57,14 @@ GrB_Info GB_Matrix_dup // make an exact copy of a matrix // [ [ create C; malloc C->p and do not initialize it double memory = GBYTES (A->ncols + 1, sizeof (int64_t)) ; GrB_Info info ; - GB_NEW (handle, A->type, A->nrows, A->ncols, false, true) ; + GrB_Matrix C ; + GB_NEW (&C, A->type, A->nrows, A->ncols, false, true) ; + if (info != GrB_SUCCESS) { (*handle) = NULL ; return (info) ; } - GrB_Matrix C = *handle ; // quick return if A is empty if (A->nzmax == 0) @@ -59,6 +72,7 @@ GrB_Info GB_Matrix_dup // make an exact copy of a matrix // both the input matrix A and the output matrix C are empty GB_Matrix_clear (C) ; // C is now intialized ] + (*handle) = C ; return (REPORT_SUCCESS) ; } @@ -67,7 +81,7 @@ GrB_Info GB_Matrix_dup // make an exact copy of a matrix if (!GB_Matrix_alloc (C, nnz, true, &memory)) { // out of memory - GB_MATRIX_FREE (handle) ; + GB_MATRIX_FREE (&C) ; (*handle) = NULL ; return (ERROR (GrB_OUT_OF_MEMORY, (LOG, "out of memory, %g GBytes required", memory))) ; @@ -80,6 +94,7 @@ GrB_Info GB_Matrix_dup // make an exact copy of a matrix memcpy (C->x, A->x, nnz * A->type->size) ; ASSERT_OK (GB_check (C, "C duplicate of A", 0)) ; + (*handle) = C ; return (REPORT_SUCCESS) ; } diff --git a/GraphBLAS/Source/GB_Matrix_free.c b/GraphBLAS/Source/GB_Matrix_free.c index 88ac483dd0..72d590821a 100644 --- a/GraphBLAS/Source/GB_Matrix_free.c +++ b/GraphBLAS/Source/GB_Matrix_free.c @@ -26,11 +26,11 @@ void GB_Matrix_free // free a matrix A->magic = FREED ; // to help detect dangling pointers if (!A->p_shallow) { - GB_FREE_MEMORY (A->p) ; + GB_FREE_MEMORY (A->p, A->ncols+1, sizeof (int64_t)) ; } A->p = NULL ; GB_Matrix_ixfree (A) ; - GB_FREE_MEMORY (*matrix) ; + GB_FREE_MEMORY (*matrix, 1, sizeof (GB_Matrix_opaque)) ; } (*matrix) = NULL ; } diff --git a/GraphBLAS/Source/GB_Matrix_ixfree.c b/GraphBLAS/Source/GB_Matrix_ixfree.c index eb4ddac3f6..c211bfd566 100644 --- a/GraphBLAS/Source/GB_Matrix_ixfree.c +++ b/GraphBLAS/Source/GB_Matrix_ixfree.c @@ -35,7 +35,7 @@ void GB_Matrix_ixfree // free all but A->p // free A->i unless it is shallow if (!A->i_shallow) { - GB_FREE_MEMORY (A->i) ; + GB_FREE_MEMORY (A->i, A->nzmax, sizeof (int64_t)) ; } A->i = NULL ; A->i_shallow = false ; @@ -43,7 +43,7 @@ void GB_Matrix_ixfree // free all but A->p // free A->x unless it is shallow if (!A->x_shallow) { - GB_FREE_MEMORY (A->x) ; + GB_FREE_MEMORY (A->x, A->nzmax, A->type->size) ; } A->x = NULL ; A->x_shallow = false ; diff --git a/GraphBLAS/Source/GB_Matrix_multiply.c b/GraphBLAS/Source/GB_Matrix_multiply.c index fad099be16..40c2d5c8e3 100644 --- a/GraphBLAS/Source/GB_Matrix_multiply.c +++ b/GraphBLAS/Source/GB_Matrix_multiply.c @@ -44,7 +44,7 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' // check inputs //-------------------------------------------------------------------------- - // [ [ [ [ [ C need not be initialized, just the column pointers present + // C need not be initialized, just the column pointers present ASSERT (C != NULL && C->p != NULL && !C->p_shallow) ; ASSERT_OK_OR_NULL (GB_check (Mask, "Mask for generic A*B", 0)) ; ASSERT_OK (GB_check (A, "A for generic A*B", 0)) ; @@ -120,6 +120,7 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' OK (GB_AxB_symbolic (C, M, A, B, false, false, false)) ; OK (GB_AxB_numeric (C, M, A, B, semiring, flipxy, flo)) ; did_mask = (M != NULL) ; + if (did_mask) ASSERT (ZOMBIES_OK (C)) ; } else @@ -129,11 +130,21 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' // C = A'*B //------------------------------------------------------------------ - // select the method that uses the least workspace - bool C_is_small = C->nrows <= 4 && C->ncols <= 4 && - (C->nrows * C->ncols) <= 4 ; + bool use_adotb ; + if (Mask != NULL) + { + // C = A'*B always uses the dot product method + use_adotb = true ; + } + else + { + // C = A'*B uses the dot product method only if C is small + GrB_Index cwork ; + bool ok = GB_Index_multiply (&cwork, C->nrows, C->ncols) ; + use_adotb = ok && cwork < IMIN (at_workspace, 4 * bt_workspace); + } - if (Mask != NULL || C_is_small) + if (use_adotb) { //-------------------------------------------------------------- @@ -144,14 +155,14 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' // are computed, which makes this method very efficient when // the mask is very sparse (triangle counting, for example). // Each entry C(i,j) for which Mask(i,j)=1 is computed via a - // dot product, C(i,j)=sum(A(:,i)*B(:,j)). If the mask is not + // dot product, C(i,j)=A(:,i)'*B(:,j). If the mask is not // present, the dot-product method is very slow in general, and // thus the outer-product method (GB_AxB_symbolic and _numeric, // in the two cases below) is used instead, with A or B being // explicitly transposed. OK (GB_Matrix_AdotB (C, Mask, A, B, semiring, flipxy)) ; - did_mask = true ; + did_mask = (Mask != NULL) ; } else if (at_workspace < 4 * bt_workspace) @@ -170,6 +181,7 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' bool flo = GB_AxB_flopcount (AT, B, flimit, &f) ; OK (GB_AxB_symbolic (C, NULL, AT, B, false, false, false)) ; OK (GB_AxB_numeric (C, NULL, AT, B, semiring, flipxy, flo)) ; + ASSERT (!ZOMBIES (C)) ; } else @@ -191,6 +203,7 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' bool flo = GB_AxB_flopcount (BT, A, flimit, &f) ; OK (GB_AxB_symbolic (CT, NULL, BT, A, false, false, false)) ; OK (GB_AxB_numeric (CT, NULL, BT, A, semiring, !flipxy, flo)) ; + ASSERT (!ZOMBIES (CT)) ; GrB_free (&BT) ; // C = CT', no typecasting, no operator @@ -227,6 +240,7 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' OK (GB_AxB_symbolic (C, M, A, BT, false, false, false)) ; OK (GB_AxB_numeric (C, M, A, BT, semiring, flipxy, flo)) ; did_mask = (M != NULL) ; + if (did_mask) ASSERT (ZOMBIES_OK (C)) ; } else @@ -259,10 +273,12 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' OK (GB_AxB_symbolic (CT, M, B, AT, false, false, false)) ; OK (GB_AxB_numeric (CT, M, B, AT, semiring, !flipxy, flo)) ; did_mask = (M != NULL) ; + if (did_mask) ASSERT (ZOMBIES_OK (CT)) ; GrB_free (&MT) ; GrB_free (&AT) ; // C = CT', no typecasting, no operator + APPLY_PENDING_UPDATES (CT) ; OK (GB_Matrix_transpose (C, CT, NULL, true)) ; } @@ -291,6 +307,7 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' // transpose A or B in the symbolic analysis. OK (GB_AxB_symbolic (CT, NULL, B, A, false, false, false)) ; OK (GB_AxB_numeric (CT, NULL, B, A, semiring, !flipxy, true)) ; + ASSERT (!ZOMBIES (CT)) ; } else @@ -318,12 +335,13 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' // GB_AxB_symbolic (CT, MT, B, A, false, false, false)) ; OK (GB_AxB_numeric (CT, MT, B, A, semiring, !flipxy, false)) ; did_mask = (MT != NULL) ; + if (did_mask) ASSERT (ZOMBIES_OK (CT)) ; GrB_free (&MT) ; + APPLY_PENDING_UPDATES (CT) ; } // C = CT', no typecasting, no operator OK (GB_Matrix_transpose (C, CT, NULL, true)) ; - } } @@ -334,6 +352,7 @@ GrB_Info GB_Matrix_multiply // C = A*B, A'*B, A*B', or A'*B' FREE_ALL ; ASSERT_OK (GB_check (C, "C output for generic C=A*B", 0)) ; (*mask_applied) = did_mask ; + if (did_mask) { ASSERT (ZOMBIES_OK (C)) ; } else { ASSERT (!ZOMBIES (C)) ; } return (REPORT_SUCCESS) ; } diff --git a/GraphBLAS/Source/GB_Matrix_realloc.c b/GraphBLAS/Source/GB_Matrix_realloc.c index 07d742db7f..133e76d62e 100644 --- a/GraphBLAS/Source/GB_Matrix_realloc.c +++ b/GraphBLAS/Source/GB_Matrix_realloc.c @@ -64,7 +64,7 @@ bool GB_Matrix_realloc // reallocate space in a matrix } else { - GB_FREE_MEMORY (A->x) ; + GB_FREE_MEMORY (A->x, A->nzmax, A->type->size) ; } bool ok = ok1 && ok2 ; diff --git a/GraphBLAS/Source/GB_Matrix_transplant.c b/GraphBLAS/Source/GB_Matrix_transplant.c index 1ae6c3c2e7..ec3d4795ce 100644 --- a/GraphBLAS/Source/GB_Matrix_transplant.c +++ b/GraphBLAS/Source/GB_Matrix_transplant.c @@ -38,7 +38,10 @@ GrB_Info GB_Matrix_transplant // transplant one matrix into another ASSERT (C != NULL && !C->p_shallow) ; ASSERT_OK (GB_check (A, "A before transplant", 0)) ; ASSERT_OK (GB_check (ctype, "new type for C", 0)) ; - ASSERT (!PENDING (A)) ; ASSERT (!ZOMBIES (A)) ; + ASSERT (!PENDING (A)) ; + + // zombies in A can be safely transplanted into C + ASSERT (ZOMBIES_OK (A)) ; // C must be the same dimensions as A, and the types must be compatible ASSERT (C->nrows == A->nrows && C->ncols == A->ncols) ; @@ -76,7 +79,7 @@ GrB_Info GB_Matrix_transplant // transplant one matrix into another { // A->p is not shallow, so free the existing C->p if it exists and // replace with A->p - GB_FREE_MEMORY (C->p) ; + GB_FREE_MEMORY (C->p, C->ncols+1, sizeof (int64_t)) ; C->p = A->p ; A->p = NULL ; } @@ -165,7 +168,7 @@ GrB_Info GB_Matrix_transplant // transplant one matrix into another GB_cast_array (C->x, C->type->code, A->x, A->type->code, anz) ; if (!A->x_shallow) { - GB_FREE_MEMORY (A->x) ; + GB_FREE_MEMORY (A->x, A->nzmax, A->type->size) ; } A->x = NULL ; } @@ -193,16 +196,20 @@ GrB_Info GB_Matrix_transplant // transplant one matrix into another A->i = NULL ; } - ASSERT (A->i == NULL) ; // has been freed or removed + ASSERT (A->i == NULL) ; // has been freed or removed A->i_shallow = false ; ASSERT (C->i != NULL) ; C->i_shallow = false ; + C->nzombies = A->nzombies ; // zombies have been transplanted into C + GB_queue_insert (C) ; + //-------------------------------------------------------------------------- // free A and return result //-------------------------------------------------------------------------- + // if A has zombies, it is removed from the queue by GB_Matrix_free GB_MATRIX_FREE (Ahandle) ; ASSERT_OK (GB_check (C, "C after transplant", 0)) ; return (REPORT_SUCCESS) ; diff --git a/GraphBLAS/Source/GB_Monoid_new.c b/GraphBLAS/Source/GB_Monoid_new.c index 7010f181cc..259032fb24 100644 --- a/GraphBLAS/Source/GB_Monoid_new.c +++ b/GraphBLAS/Source/GB_Monoid_new.c @@ -73,7 +73,7 @@ GrB_Info GB_Monoid_new // create a monoid GB_MALLOC_MEMORY (mon->identity, 1, op->ztype->size) ; if (mon->identity == NULL) { - GB_FREE_MEMORY (*monoid) ; + GB_FREE_MEMORY (*monoid, 1, sizeof (GB_Monoid_opaque)) ; return (ERROR (GrB_OUT_OF_MEMORY, (LOG, "out of memory"))) ; } diff --git a/GraphBLAS/Source/GB_Work_free.c b/GraphBLAS/Source/GB_Work_free.c index 9ee3bd8b02..3707fef724 100644 --- a/GraphBLAS/Source/GB_Work_free.c +++ b/GraphBLAS/Source/GB_Work_free.c @@ -11,7 +11,8 @@ void GB_Work_free ( ) // free the Work array { - GB_FREE_MEMORY (GB_thread_local.Work) ; + int64_t currsize = GB_thread_local.Work_size ; + GB_FREE_MEMORY (GB_thread_local.Work, currsize, sizeof (char)) ; GB_thread_local.Work_size = 0 ; } diff --git a/GraphBLAS/Source/GB_accum_mask.c b/GraphBLAS/Source/GB_accum_mask.c index 8fc2100f9d..964d619e73 100644 --- a/GraphBLAS/Source/GB_accum_mask.c +++ b/GraphBLAS/Source/GB_accum_mask.c @@ -167,6 +167,7 @@ GrB_Info GB_accum_mask // C = accum (C,T) // need to do a deep copy if T is shallow. T is normally not shallow, // but there are a few cases in which it can be a shallow copy of the // user's input matrix. T is freed by GB_Matrix_transplant. + // T may have zombies, which are transplanted into Z. info = GB_Matrix_transplant (Z, C->type, Thandle) ; // Z is now initialized, and Z->p, Z->i, and Z->x are allocated ] } diff --git a/GraphBLAS/Source/GB_add_pending.c b/GraphBLAS/Source/GB_add_pending.c index c42ad3fca3..5754e9de27 100644 --- a/GraphBLAS/Source/GB_add_pending.c +++ b/GraphBLAS/Source/GB_add_pending.c @@ -93,7 +93,7 @@ GrB_Info GB_add_pending // add a pending tuple A(i,j) to a matrix if (A->npending == A->max_npending) { - int64_t newsize = IMAX (4 * A->max_npending, INITIAL_NPENDING_MAX) ; + int64_t newsize = IMAX (2 * A->max_npending, INITIAL_NPENDING_MAX) ; bool ok1 = true ; bool ok2 = true ; bool ok3 = true ; diff --git a/GraphBLAS/Source/GB_assign.c b/GraphBLAS/Source/GB_assign.c index a73b637c29..94afce3206 100644 --- a/GraphBLAS/Source/GB_assign.c +++ b/GraphBLAS/Source/GB_assign.c @@ -30,28 +30,12 @@ #include "GB.h" -#define FREE_ALL \ -{ \ - GB_FREE_MEMORY (I2) ; \ - GB_FREE_MEMORY (J2) ; \ - GB_MATRIX_FREE (&AT) ; \ - GB_MATRIX_FREE (&Mask2) ; \ -} - -// free workspace, put C in the queue, clear Mark, and block on C if needed -#define CLEANUP_AND_RETURN \ -{ \ - FREE_ALL ; \ - if (C->nzombies > 0) \ - { \ - /* make sure C is in the queue */ \ - GB_queue_insert (C) ; \ - } \ - /* clear the Mark array */ \ - GB_Mark_reset (1,0) ; \ - /* finalize C if blocking mode is enabled, and return result */ \ - ASSERT_OK (GB_check (C, "Final C for assign", 0)) ; \ - return (GB_block (C)) ; \ +#define FREE_ALL \ +{ \ + GB_FREE_MEMORY (I2, ni, sizeof (GrB_Index)) ; \ + GB_FREE_MEMORY (J2, nj, sizeof (GrB_Index)) ; \ + GB_MATRIX_FREE (&AT) ; \ + GB_MATRIX_FREE (&Mask2) ; \ } GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) @@ -60,7 +44,7 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) const bool C_replace, const GrB_Matrix Mask, // optional mask for C, unused if NULL const bool Mask_comp, - const GrB_BinaryOp accum, // optional accum for Z=accum(C,T) + const GrB_BinaryOp accum, // optional accum for accum(C,T) const GrB_Matrix A, // input matrix const bool A_transpose, const GrB_Index *I_in, // row indices @@ -210,15 +194,6 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) } } - //-------------------------------------------------------------------------- - // initialize workspace - //-------------------------------------------------------------------------- - - GrB_Index *I2 = NULL ; - GrB_Index *J2 = NULL ; - GrB_Matrix AT = NULL ; - GrB_Matrix Mask2 = NULL ; - //-------------------------------------------------------------------------- // quick return if an empty Mask is complemented //-------------------------------------------------------------------------- @@ -231,6 +206,10 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) // is true). However, the GrB_Row_assign and GrB_Col_assign only clear // their specific row or column of C, respectively. + // Mask is NULL so C and Mask cannot be the same, and A is ignored so + // it doesn't matter whether or not C == A. Thus C is not aliased + // to the inputs. + if (C_replace) { if (row_assign) @@ -293,7 +272,14 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) } } - CLEANUP_AND_RETURN ; + if (C->nzombies > 0) + { + // make sure C is in the queue + GB_queue_insert (C) ; + } + // finalize C if blocking mode is enabled, and return result + ASSERT_OK (GB_check (C, "Final C for assign, quick mask", 0)) ; + return (GB_block (C)) ; } //-------------------------------------------------------------------------- @@ -343,6 +329,8 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) // apply pending updates to A and Mask //-------------------------------------------------------------------------- + // if C == Mask or C == A, pending updates are applied to C as well + // delete any lingering zombies and assemble any pending tuples // but only in A and Mask, not C APPLY_PENDING_UPDATES (Mask) ; @@ -351,6 +339,15 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) APPLY_PENDING_UPDATES (A) ; } + //-------------------------------------------------------------------------- + // initialize workspace + //-------------------------------------------------------------------------- + + GrB_Index *I2 = NULL ; + GrB_Index *J2 = NULL ; + GrB_Matrix AT = NULL ; + GrB_Matrix Mask2 = NULL ; + //-------------------------------------------------------------------------- // scalar expansion: sort I and J and remove duplicates //-------------------------------------------------------------------------- @@ -493,11 +490,41 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) } //-------------------------------------------------------------------------- - // C(I,J) = A or accum (C(I,J),A) + // Z = C + //-------------------------------------------------------------------------- + + // GB_subassign_kernel modifies C efficiently in place, but it can only do + // so if C is not aliased with A2 or SubMask. If C is aliased a copy must + // be made. GB_subassign_kernel operates on the copy, Z, which is then + // transplanted back into C when done. This is costly, and can have + // performance implications, but it is the only reasonable method. If C is + // aliased, then the assignment is a large one and copying the whole matrix + // will not add much time. + + GrB_Matrix Z ; + bool aliased = (C == A2 || C == SubMask) ; + if (aliased) + { + // Z = duplicate of C + info = GB_Matrix_dup (&Z, C) ; + if (info != GrB_SUCCESS) + { + FREE_ALL ; + return (info) ; + } + } + else + { + // GB_subassign_kernel can safely operate on C in place + Z = C ; + } + + //-------------------------------------------------------------------------- + // Z(I,J) = A or accum (Z(I,J),A) //-------------------------------------------------------------------------- info = GB_subassign_kernel ( - C, C_replace, // C matrix and its descriptor + Z, C_replace, // Z matrix and its descriptor SubMask, Mask_comp, // Mask matrix and its descriptor accum, // for accum (C(I,J),A) A2, // A matrix, NULL for scalar expansion @@ -514,18 +541,19 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) // return if GB_subassign_kernel failed if (info != GrB_SUCCESS) { + if (aliased) GB_MATRIX_FREE (&Z) ; FREE_ALL ; return (info) ; } //-------------------------------------------------------------------------- - // examine C outside the C(I,J) submatrix + // examine Z outside the Z(I,J) submatrix //-------------------------------------------------------------------------- if (C_replace_phase) { // Let M be the mask operator as determined by the Mask matrix. If - // C_replace is true and M(i,j)=0 for any entry outside the C(I,J) + // C_replace is true and M(i,j)=0 for any entry outside the Z(I,J) // submatrix, then that entry must be deleted. This phase is very // costly but it is what the GraphBLAS Specification requires. // This phase is skipped if C_replace is false. @@ -534,30 +562,31 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) // or not). If the Mask is not present, then it is not complemented // (see the "quick return" case above). So if there is no Mask // matrix, M(I,J)=1 is true, so C_replace has no effect outside the - // C(I,J) submatrix. + // Z(I,J) submatrix. // Also, if IJ_whole_matrix is true, then there is nothing outside - // the C(I,J) submatrix to modify, so this phase is skipped if + // the Z(I,J) submatrix to modify, so this phase is skipped if // IJ_whole_matrix is true. + // This code assumes Z and Mask are not aliased to each other. + //---------------------------------------------------------------------- // assemble any pending tuples //---------------------------------------------------------------------- - if (PENDING (C)) + if (Z->npending > 0) { - info = GB_wait (C) ; + info = GB_wait (Z) ; if (info != GrB_SUCCESS) { + if (aliased) GB_MATRIX_FREE (&Z) ; FREE_ALL ; return (info) ; } } - // at this point, success is guaranteed - //---------------------------------------------------------------------- - // use Mark workspace to flag rows/cols inside the C(I,J) submatrix + // use Mark workspace to flag rows/cols inside the Z(I,J) submatrix //---------------------------------------------------------------------- int64_t flag = GB_Mark_reset (1,0) ; @@ -566,9 +595,9 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) if (I != GrB_ALL) { - // Mark_row has size C->nrows + // Mark_row has size Z->nrows Mark_row = Mark ; - Mark += C->nrows ; + Mark += Z->nrows ; for (int64_t k = 0 ; k < ni ; k++) { Mark_row [I [k]] = flag ; @@ -577,7 +606,7 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) if (J != GrB_ALL) { - // Mark_col has size C->ncols + // Mark_col has size Z->ncols Mark_col = Mark ; for (int64_t k = 0 ; k < nj ; k++) { @@ -586,11 +615,11 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) } //---------------------------------------------------------------------- - // get C and the Mask + // get Z and the Mask //---------------------------------------------------------------------- - const int64_t *Cp = C->p ; - int64_t *Ci = C->i ; + const int64_t *Zp = Z->p ; + int64_t *Zi = Z->i ; const int64_t *Maskp = Mask->p ; const int64_t *Maski = Mask->i ; @@ -600,36 +629,36 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) GB_cast_factory (GB_BOOL_code, Mask->type->code) ; //---------------------------------------------------------------------- - // delete entries outside C(I,J) for which M(i,j) is false + // delete entries outside Z(I,J) for which M(i,j) is false //---------------------------------------------------------------------- if (row_assign) { //------------------------------------------------------------------ - // row assignment, examine just C(i,:) + // row assignment, examine just Z(i,:) //------------------------------------------------------------------ - // GrB_Row_assign: only examine the row C(i,:) + // GrB_Row_assign: only examine the row Z(i,:) // Mask is a single row int64_t i = I [0] ; - for (int64_t j = 0 ; j < C->ncols ; j++) + for (int64_t j = 0 ; j < Z->ncols ; j++) { - // j_outside is true if column j is outside the C(I,J) submatrix + // j_outside is true if column j is outside the Z(I,J) submatrix bool j_outside = (Mark_col != NULL) && (Mark_col [j] < flag) ; if (j_outside) { - // find C(i,j) if it exists - int64_t p = Cp [j] ; - int64_t pright = Cp [j+1]-1 ; + // find Z(i,j) if it exists + int64_t p = Zp [j] ; + int64_t pright = Zp [j+1]-1 ; bool found, is_zombie ; - GB_BINARY_ZOMBIE (i, Ci, p, pright, found, C->nzombies, + GB_BINARY_ZOMBIE (i, Zi, p, pright, found, Z->nzombies, is_zombie) ; if (found && !is_zombie) { - // C(i,j) is a live entry not in the C(I,J) submatrix. + // Z(i,j) is a live entry not in the Z(I,J) submatrix. // Check the Mask(0,j) to see if it should be deleted. bool Mij = false ; int64_t pmask = Maskp [j] ; @@ -645,9 +674,9 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) } if (Mij == false) { - // delete C(i,j) by marking it as a zombie - C->nzombies++ ; - Ci [p] = FLIP (i) ; + // delete Z(i,j) by marking it as a zombie + Z->nzombies++ ; + Zi [p] = FLIP (i) ; } } } @@ -658,27 +687,27 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) { //------------------------------------------------------------------ - // column assignment, examine just C(:,j) + // column assignment, examine just Z(:,j) //------------------------------------------------------------------ // Mask is a single column int64_t j = J [0] ; - for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + for (int64_t p = Zp [j] ; p < Zp [j+1] ; p++) { - // C(i,j) is outside the C(I,j) subcolumn if either i is + // Z(i,j) is outside the Z(I,j) subcolumn if either i is // not in the list I - int64_t i = Ci [p] ; + int64_t i = Zi [p] ; if (i < 0) { - // C(i,j) is already a zombie; skip it. + // Z(i,j) is already a zombie; skip it. continue ; } bool i_outside = (Mark_row != NULL) && (Mark_row [i] < flag) ; if (i_outside) { - // C(i,j) is a live entry not in the C(I,j) subcolumn. + // Z(i,j) is a live entry not in the Z(I,j) subcolumn. // Check the Mask to see if it should be deleted. bool Mij ; int64_t pleft = Maskp [0] ; @@ -702,9 +731,9 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) } if (Mij == false) { - // delete C(i,j) by marking it as a zombie - C->nzombies++ ; - Ci [p] = FLIP (i) ; + // delete Z(i,j) by marking it as a zombie + Z->nzombies++ ; + Zi [p] = FLIP (i) ; } } } @@ -713,23 +742,23 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) { //------------------------------------------------------------------ - // Matrix/vector assignment: examine all of C + // Matrix/vector assignment: examine all of Z //------------------------------------------------------------------ - // Mask has the same size as C - for (int64_t j = 0 ; j < C->ncols ; j++) + // Mask has the same size as Z + for (int64_t j = 0 ; j < Z->ncols ; j++) { - // j_outside is true if column j is outside the C(I,J) submatrix + // j_outside is true if column j is outside the Z(I,J) submatrix bool j_outside = (Mark_col != NULL) && (Mark_col [j] < flag) ; - for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + for (int64_t p = Zp [j] ; p < Zp [j+1] ; p++) { - // C(i,j) is outside the C(I,J) submatrix if either i is + // Z(i,j) is outside the Z(I,J) submatrix if either i is // not in the list I, or j is not in J, or both. - int64_t i = Ci [p] ; + int64_t i = Zi [p] ; if (i < 0) { - // C(i,j) is already a zombie; skip it. + // Z(i,j) is already a zombie; skip it. continue ; } bool i_outside = (Mark_row != NULL) @@ -737,7 +766,7 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) if (j_outside || i_outside) { - // C(i,j) is a live entry not in the C(I,J) submatrix. + // Z(i,j) is a live entry not in the Z(I,J) submatrix. // Check the Mask to see if it should be deleted. bool Mij ; int64_t pleft = Maskp [j] ; @@ -761,23 +790,64 @@ GrB_Info GB_assign // C(I,J) = accum (C(I,J),A) } if (Mij == false) { - // delete C(i,j) by marking it as a zombie - C->nzombies++ ; - Ci [p] = FLIP (i) ; + // delete Z(i,j) by marking it as a zombie + Z->nzombies++ ; + Zi [p] = FLIP (i) ; } } } } } + + // clear the Mark array + GB_Mark_reset (1,0) ; + } + + // free workspace + FREE_ALL ; + + //-------------------------------------------------------------------------- + // C = Z + //-------------------------------------------------------------------------- + + if (aliased) + { + // zombies can be transplanted into C but pending tuples cannot + if (Z->npending > 0) + { + // assemble all pending tuples, and delete all zombies too + info = GB_wait (Z) ; + } + if (info == GrB_SUCCESS) + { + // transplants the content of Z into C and frees Z. + // this always succeeds since nothing gets allocated. + info = GB_Matrix_transplant (C, C->type, &Z) ; + ASSERT (info == GrB_SUCCESS) ; + } + if (info != GrB_SUCCESS) + { + // Z needs to be freed if C is aliased but info != GrB_SUCCESS. + // C remains unchanged. + GB_MATRIX_FREE (&Z) ; + return (info) ; + } } //-------------------------------------------------------------------------- // cleanup //-------------------------------------------------------------------------- - CLEANUP_AND_RETURN ; + if (C->nzombies > 0) + { + // make sure C is in the queue + GB_queue_insert (C) ; + } + + // finalize C if blocking mode is enabled, and return result + ASSERT_OK (GB_check (C, "Final C for assign", 0)) ; + return (GB_block (C)) ; } #undef FREE_ALL -#undef CLEANUP_AND_RETURN diff --git a/GraphBLAS/Source/GB_build.c b/GraphBLAS/Source/GB_build.c index d05f0f1188..c6e41ab200 100644 --- a/GraphBLAS/Source/GB_build.c +++ b/GraphBLAS/Source/GB_build.c @@ -187,8 +187,8 @@ GrB_Info GB_build // check inputs then build matrix if (!ok) { // out of memory - GB_FREE_MEMORY (iwork) ; - GB_FREE_MEMORY (jwork) ; + GB_FREE_MEMORY (iwork, len, sizeof (int64_t)) ; + GB_FREE_MEMORY (jwork, len, sizeof (int64_t)) ; return (ERROR (GrB_OUT_OF_MEMORY, (LOG, "out of memory, %g GBytes required", memory))) ; } @@ -219,8 +219,8 @@ GrB_Info GB_build // check inputs then build matrix if (out_of_bounds) { // invalid index - GB_FREE_MEMORY (iwork) ; - GB_FREE_MEMORY (jwork) ; + GB_FREE_MEMORY (iwork, len, sizeof (int64_t)) ; + GB_FREE_MEMORY (jwork, len, sizeof (int64_t)) ; return (ERROR (GrB_INDEX_OUT_OF_BOUNDS, (LOG, "index ("GBu","GBu") out of bounds," " must be < ("GBd", "GBd")", @@ -258,8 +258,8 @@ GrB_Info GB_build // check inputs then build matrix if (out_of_bounds) { // invalid index - GB_FREE_MEMORY (iwork) ; - GB_FREE_MEMORY (jwork) ; + GB_FREE_MEMORY (iwork, len, sizeof (int64_t)) ; + GB_FREE_MEMORY (jwork, len, sizeof (int64_t)) ; return (ERROR (GrB_INDEX_OUT_OF_BOUNDS, (LOG, "index ("GBu") out of bounds, must be < ("GBd")", I [k], nrows))) ; @@ -289,7 +289,7 @@ GrB_Info GB_build // check inputs then build matrix if (C->type == dup->ztype) { // construct C directly; this is the fastest option - info = GB_builder (C, &iwork, &jwork, sorted, X, len, dup, X_code) ; + info = GB_builder (C, &iwork, &jwork, sorted, X, len, len, dup, X_code) ; ASSERT (iwork == NULL) ; ASSERT (jwork == NULL) ; ASSERT (info == GrB_SUCCESS || info == GrB_OUT_OF_MEMORY) ; @@ -302,14 +302,14 @@ GrB_Info GB_build // check inputs then build matrix GB_NEW (&T, dup->ztype, C->nrows, C->ncols, false, true) ; if (info != GrB_SUCCESS) { - GB_FREE_MEMORY (iwork) ; - GB_FREE_MEMORY (jwork) ; + GB_FREE_MEMORY (iwork, len, sizeof (int64_t)) ; + GB_FREE_MEMORY (jwork, len, sizeof (int64_t)) ; ASSERT (info == GrB_OUT_OF_MEMORY) ; return (info) ; } // build T from the tuples - info = GB_builder (T, &iwork, &jwork, sorted, X, len, dup, X_code) ; + info = GB_builder (T, &iwork, &jwork, sorted, X, len, len, dup, X_code) ; ASSERT (iwork == NULL) ; ASSERT (jwork == NULL) ; if (info != GrB_SUCCESS) diff --git a/GraphBLAS/Source/GB_build_factory.c b/GraphBLAS/Source/GB_build_factory.c index 9370d4b361..c580d78d66 100644 --- a/GraphBLAS/Source/GB_build_factory.c +++ b/GraphBLAS/Source/GB_build_factory.c @@ -49,7 +49,8 @@ GrB_Info GB_build_factory // build a matrix int64_t **iwork_handle, // for (i,k) or (j,i,k) tuples int64_t **kwork_handle, // for (i,k) or (j,i,k) tuples const void *X, // array of values of tuples - const int64_t len, // number of tuples + const int64_t len, // number of tuples and size of kwork + const int64_t ilen, // size of iwork array const GrB_BinaryOp dup, // binary function to assemble duplicates, // if NULL use the "SECOND" function to // keep the most recent duplicate. @@ -90,8 +91,8 @@ GrB_Info GB_build_factory // build a matrix if (C->x == NULL) { // out of memory - GB_FREE_MEMORY (*kwork_handle) ; - GB_FREE_MEMORY (*iwork_handle) ; + GB_FREE_MEMORY (*kwork_handle, len, sizeof (int64_t)) ; + GB_FREE_MEMORY (*iwork_handle, ilen, sizeof (int64_t)) ; GB_Matrix_clear ((GrB_Matrix) C) ; return (ERROR (GrB_OUT_OF_MEMORY, (LOG, "out of memory, %g GBytes required", memory))) ; @@ -302,7 +303,7 @@ GrB_Info GB_build_factory // build a matrix // phases, kwork is part of the symbolic analysis and should be kept for // subsequent builds with the same I and J but different X. - GB_FREE_MEMORY (*kwork_handle) ; + GB_FREE_MEMORY (*kwork_handle, len, sizeof (int64_t)) ; kwork = NULL ; //-------------------------------------------------------------------------- @@ -310,11 +311,11 @@ GrB_Info GB_build_factory // build a matrix //-------------------------------------------------------------------------- // shrink iwork from size len to size C->nzmax - if (C->nzmax < len) + if (C->nzmax < ilen) { // this cannot fail since the size is shrinking. bool ok ; - GB_REALLOC_MEMORY (iwork, C->nzmax, len, sizeof (int64_t), &ok) ; + GB_REALLOC_MEMORY (iwork, C->nzmax, ilen, sizeof (int64_t), &ok) ; ASSERT (ok) ; } C->i = iwork ; diff --git a/GraphBLAS/Source/GB_builder.c b/GraphBLAS/Source/GB_builder.c index 0111dae2e1..38cbe5228e 100644 --- a/GraphBLAS/Source/GB_builder.c +++ b/GraphBLAS/Source/GB_builder.c @@ -19,6 +19,7 @@ GrB_Info GB_builder const bool already_sorted, // true if tuples already sorted on input const void *X, // array of values of tuples const int64_t len, // number of tuples + const int64_t ijlen, // size of i,j work arrays const GrB_BinaryOp dup, // binary function to assemble duplicates, // if NULL use the "SECOND" function to // keep the most recent duplicate. @@ -74,8 +75,8 @@ GrB_Info GB_builder GB_MALLOC_MEMORY (kwork, len, sizeof (int64_t)) ; if (kwork == NULL) { - GB_FREE_MEMORY (*iwork_handle) ; - GB_FREE_MEMORY (*jwork_handle) ; + GB_FREE_MEMORY (*iwork_handle, ijlen, sizeof (int64_t)) ; + GB_FREE_MEMORY (*jwork_handle, ijlen, sizeof (int64_t)) ; GB_Matrix_clear (C) ; return (ERROR (GrB_OUT_OF_MEMORY, (LOG, "out of memory, %g GBytes required", @@ -266,7 +267,7 @@ GrB_Info GB_builder // one column. But the jwork_handle itself is always non-NULL. ASSERT (jwork_handle != NULL) ; - GB_FREE_MEMORY (*jwork_handle) ; + GB_FREE_MEMORY (*jwork_handle, ijlen, sizeof (int64_t)) ; //-------------------------------------------------------------------------- // numerical phase of the build via switch factory or generic workers @@ -283,6 +284,6 @@ GrB_Info GB_builder // input tuples in I and J, also of size len (len == nvals), and it must // also construct the column pointers C->p. - return (GB_build_factory (C, iwork_handle, &kwork, X, len, dup, X_code)) ; + return (GB_build_factory (C, iwork_handle, &kwork, X, len, ijlen, dup, X_code)) ; } diff --git a/GraphBLAS/Source/GB_calloc_memory.c b/GraphBLAS/Source/GB_calloc_memory.c index 597d0c5342..baf65a5bbb 100644 --- a/GraphBLAS/Source/GB_calloc_memory.c +++ b/GraphBLAS/Source/GB_calloc_memory.c @@ -29,6 +29,7 @@ void *GB_calloc_memory // pointer to allocated block of memory void *p ; size_t size ; + int nmalloc ; // make sure at least one item is allocated nitems = IMAX (1, nitems) ; @@ -45,36 +46,43 @@ void *GB_calloc_memory // pointer to allocated block of memory else { - if (GB_thread_local.malloc_debug) + // check the malloc debug status. This debug flag is set outside + // of GraphBLAS and not modified, so it is safe to check it outside + // a critical section. + bool pretend_to_fail = false ; + if (GB_Global.malloc_debug) { // brutal malloc debug; pretend to fail if the count <= 0 - if (GB_thread_local.malloc_debug_count <= 0) + #pragma omp critical (GB_memory) { - return (false) ; + pretend_to_fail = (GB_Global.malloc_debug_count-- <= 0) ; } } - p = (void *) CALLOC (nitems, size_of_item) ; + if (pretend_to_fail) + { + p = NULL ; + } + else + { + p = (void *) CALLOC (nitems, size_of_item) ; + } if (p != NULL) { - GB_thread_local.nmalloc++ ; + #pragma omp critical (GB_memory) + { + nmalloc = ++GB_Global.nmalloc ; + GB_Global.inuse += nitems * size_of_item ; + GB_Global.maxused = IMAX (GB_Global.maxused, GB_Global.inuse) ; + } #ifdef PRINT_MALLOC printf ("calloc: %14p %3d %1d n "GBd" size "GBd"\n", - p, - (int) GB_thread_local.nmalloc, - GB_thread_local.malloc_debug, + p, nmalloc, GB_Global.malloc_debug, (int64_t) nitems, (int64_t) size_of_item) ; #endif - - // a malloc has been used up - if (GB_thread_local.malloc_debug) - { - GB_thread_local.malloc_debug_count-- ; - } } - } return (p) ; } diff --git a/GraphBLAS/Source/GB_free_memory.c b/GraphBLAS/Source/GB_free_memory.c index 759d916d56..2910f2ee5f 100644 --- a/GraphBLAS/Source/GB_free_memory.c +++ b/GraphBLAS/Source/GB_free_memory.c @@ -14,27 +14,37 @@ #include "GB.h" -void GB_free_memory // pointer to allocated block of memory to free +void GB_free_memory ( - void *p + void *p, // pointer to allocated block of memory to free + size_t nitems, // number of items to free + size_t size_of_item // sizeof each item ) { if (p != NULL) { - GB_thread_local.nmalloc-- ; + // at least one item is always allocated + nitems = IMAX (1, nitems) ; + int nmalloc ; + + #pragma omp critical (GB_memory) + { + nmalloc = --GB_Global.nmalloc ; + GB_Global.inuse -= nitems * size_of_item ; + } #ifdef PRINT_MALLOC - printf ("free: %14p %3d %1d\n", - p, - (int) GB_thread_local.nmalloc, - GB_thread_local.malloc_debug) ; - if (GB_thread_local.nmalloc < 0 ) - printf (GBd " free %p negative mallocs!\n", - GB_thread_local.nmalloc, p) ; + printf ("free: %14p %3d %1d n "GBd" size "GBd"\n", + p, nmalloc, GB_Global.malloc_debug, + (int64_t) nitems, (int64_t) size_of_item) ; + if (nmalloc < 0) + { + printf ("%d free %p negative mallocs!\n", nmalloc, p) ; + } #endif FREE (p) ; - ASSERT (GB_thread_local.nmalloc >= 0) ; + ASSERT (nmalloc >= 0) ; } } diff --git a/GraphBLAS/Source/GB_free_pending.c b/GraphBLAS/Source/GB_free_pending.c index 5ba8e29856..d930775674 100644 --- a/GraphBLAS/Source/GB_free_pending.c +++ b/GraphBLAS/Source/GB_free_pending.c @@ -25,9 +25,9 @@ void GB_free_pending // free all pending tuples // free all pending tuples //-------------------------------------------------------------------------- - GB_FREE_MEMORY (A->ipending) ; - GB_FREE_MEMORY (A->jpending) ; - GB_FREE_MEMORY (A->xpending) ; + GB_FREE_MEMORY (A->ipending, A->max_npending, sizeof (int64_t)) ; + GB_FREE_MEMORY (A->jpending, A->max_npending, sizeof (int64_t)) ; + GB_FREE_MEMORY (A->xpending, A->max_npending, A->type->size) ; A->npending = 0 ; A->max_npending = 0 ; A->sorted_pending = true ; diff --git a/GraphBLAS/Source/GB_malloc_memory.c b/GraphBLAS/Source/GB_malloc_memory.c index 3d125243f4..d75804a9d2 100644 --- a/GraphBLAS/Source/GB_malloc_memory.c +++ b/GraphBLAS/Source/GB_malloc_memory.c @@ -29,6 +29,7 @@ void *GB_malloc_memory // pointer to allocated block of memory void *p ; size_t size ; + int nmalloc ; // make sure at least one item is allocated nitems = IMAX (1, nitems) ; @@ -45,36 +46,44 @@ void *GB_malloc_memory // pointer to allocated block of memory else { - if (GB_thread_local.malloc_debug) + // check the malloc debug status. This debug flag is set outside + // of GraphBLAS and not modified, so it is safe to check it outside + // a critical section. + bool pretend_to_fail = false ; + if (GB_Global.malloc_debug) { // brutal malloc debug; pretend to fail if the count <= 0 - if (GB_thread_local.malloc_debug_count <= 0) + #pragma omp critical (GB_memory) { - return (false) ; + pretend_to_fail = (GB_Global.malloc_debug_count-- <= 0) ; } } - p = (void *) MALLOC (size) ; + if (pretend_to_fail) + { + p = NULL ; + } + else + { + p = (void *) MALLOC (size) ; + } if (p != NULL) { - GB_thread_local.nmalloc++ ; -#ifdef PRINT_MALLOC - printf ("malloc: %14p %3d %1d n "GBu" size "GBu"\n", - p, - (int) GB_thread_local.nmalloc, - GB_thread_local.malloc_debug, - nitems, size_of_item) ; -#endif - - // a malloc has been used up - if (GB_thread_local.malloc_debug) + #pragma omp critical (GB_memory) { - GB_thread_local.malloc_debug_count-- ; + nmalloc = ++GB_Global.nmalloc ; + GB_Global.inuse += nitems * size_of_item ; + GB_Global.maxused = IMAX (GB_Global.maxused, GB_Global.inuse) ; } - } +#ifdef PRINT_MALLOC + printf ("malloc: %14p %3d %1d n "GBd" size "GBd"\n", + p, nmalloc, GB_Global.malloc_debug, + (int64_t) nitems, (int64_t) size_of_item) ; +#endif + } } return (p) ; } diff --git a/GraphBLAS/Source/GB_mxm.c b/GraphBLAS/Source/GB_mxm.c index 34186b43e3..631cdba2ce 100644 --- a/GraphBLAS/Source/GB_mxm.c +++ b/GraphBLAS/Source/GB_mxm.c @@ -134,15 +134,23 @@ GrB_Info GB_mxm // C = A*B && (Mask == NULL || (Mask != NULL && mask_applied)) && (C_replace || NNZ (C) == 0)) { + // C = 0 ; C = (ctype) T // The Mask (if any) has already been applied in GB_Matrix_multiply. - // If C is empty, or to be cleared anyway, and if accum is not present, - // then C = (ctype) T, typecasting if needed. If no typecasting is - // done then this takes no time at all and is a pure transplant. + // If C is also empty, or to be cleared anyway, and if accum is not + // present, then T can be transplanted directly into C, as C = (ctype) + // T, typecasting if needed. If no typecasting is done then this takes + // no time at all and is a pure transplant. If T has zombies then they + // are safely transplanted into C, and are left in the final result, C. + ASSERT (ZOMBIES_OK (T)) ; return (GB_Matrix_transplant (C, C->type, &T)) ; } else { // C = accum (C,T) + // T may have zombies from the masked multiply, so delete them now. + ASSERT (ZOMBIES_OK (T)) ; + APPLY_PENDING_UPDATES (T) ; + ASSERT (!ZOMBIES (T)) ; return (GB_accum_mask (C, Mask, accum, &T, C_replace, Mask_comp)) ; } } diff --git a/GraphBLAS/Source/GB_new.c b/GraphBLAS/Source/GB_new.c index aff0521d5e..a2e5cc1de3 100644 --- a/GraphBLAS/Source/GB_new.c +++ b/GraphBLAS/Source/GB_new.c @@ -312,7 +312,7 @@ GrB_Info GB_new // create a new matrix if ((Ap_calloc || Ap_malloc) && A->p == NULL) { // out of memory - GB_FREE_MEMORY (*matrix_handle) ; + GB_FREE_MEMORY (*matrix_handle, 1, sizeof (GB_Matrix_opaque)) ; return (ERROR (GrB_OUT_OF_MEMORY, (LOG, "out of memory, %g GBytes required", GBYTES (A->ncols+1, sizeof (int64_t))))) ; diff --git a/GraphBLAS/Source/GB_object_check.c b/GraphBLAS/Source/GB_object_check.c index 2ef558ab3f..5c74ff9ac0 100644 --- a/GraphBLAS/Source/GB_object_check.c +++ b/GraphBLAS/Source/GB_object_check.c @@ -193,7 +193,7 @@ GrB_Info GB_object_check // check a GraphBLAS matrix for (int64_t p = A->p [j] ; p < A->p [j+1] ; p++) { int64_t i = A->i [p] ; - bool is_zombie = IS_FLIPPED (i) ; + bool is_zombie = IS_ZOMBIE (i) ; i = UNFLIP (i) ; if (is_zombie) nzombies++ ; if (prcol) diff --git a/GraphBLAS/Source/GB_queue_check.c b/GraphBLAS/Source/GB_queue_check.c index 01a9f26441..a644523a4d 100644 --- a/GraphBLAS/Source/GB_queue_check.c +++ b/GraphBLAS/Source/GB_queue_check.c @@ -19,7 +19,7 @@ void GB_queue_check ) { - #pragma omp critical GB_queue + #pragma omp critical (GB_queue) { // get the status of the queue for this matrix (*head) = (GrB_Matrix) (GB_Global.queue_head) ; diff --git a/GraphBLAS/Source/GB_queue_init.c b/GraphBLAS/Source/GB_queue_init.c deleted file mode 100644 index 493733414e..0000000000 --- a/GraphBLAS/Source/GB_queue_init.c +++ /dev/null @@ -1,49 +0,0 @@ -//------------------------------------------------------------------------------ -// GB_queue_init: initialize the queue -//------------------------------------------------------------------------------ - -// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. -// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. - -//------------------------------------------------------------------------------ - -#include "GB.h" - -//------------------------------------------------------------------------------ -// All Global storage is declared and initialized here -//------------------------------------------------------------------------------ - -// If the user creates threads that work on GraphBLAS matrices, then all of -// those threads must share the same matrix queue, and the same mode. - -GB_Global_struct GB_Global = -{ - - // queued matrices with work to do - .queue_head = NULL, // pointer to first queued matrix - - // GraphBLAS mode - .mode = GrB_NONBLOCKING, // default is nonblocking - -} ; - -//------------------------------------------------------------------------------ -// GB_queue_init -//------------------------------------------------------------------------------ - -void GB_queue_init -( - const GrB_Mode mode // blocking or non-blocking mode -) -{ - - #pragma omp critical GB_queue - { - // clear the queue - GB_Global.queue_head = NULL ; - - // set the mode: blocking or nonblocking - GB_Global.mode = mode ; // default is non-blocking - } -} - diff --git a/GraphBLAS/Source/GB_queue_insert.c b/GraphBLAS/Source/GB_queue_insert.c index e006105b47..ef49a4b949 100644 --- a/GraphBLAS/Source/GB_queue_insert.c +++ b/GraphBLAS/Source/GB_queue_insert.c @@ -32,30 +32,19 @@ void GB_queue_insert // insert matrix at the head of queue if ((A->npending > 0 || A->nzombies > 0) && !(A->enqueued)) { // A is not in the queue yet, but needs to be there - - #pragma omp critical GB_queue + #pragma omp critical (GB_queue) { - - // GraphBLAS is not (yet) parallel, but the user application might - // be. This update to the global queue must be done in a critical - // section. If both GraphBLAS and the user application are - // compiled with OpenMP, then the #pragma will protect the queue - // from a race condition of simulateneous updates. - + // check again to be safe, then add A to the head of the queue if ((A->npending > 0 || A->nzombies > 0) && !(A->enqueued)) { - - // check the condition again, inside the critical section, - // just to be safe - // add the matrix to the head of the queue - GrB_Matrix head = (GrB_Matrix) (GB_Global.queue_head) ; - A->queue_next = head ; + GrB_Matrix Head = (GrB_Matrix) (GB_Global.queue_head) ; + A->queue_next = Head ; A->queue_prev = NULL ; A->enqueued = true ; - if (head != NULL) + if (Head != NULL) { - head->queue_prev = A ; + Head->queue_prev = A ; } GB_Global.queue_head = A ; } diff --git a/GraphBLAS/Source/GB_queue_remove.c b/GraphBLAS/Source/GB_queue_remove.c index 47e8880d79..1aa047eb43 100644 --- a/GraphBLAS/Source/GB_queue_remove.c +++ b/GraphBLAS/Source/GB_queue_remove.c @@ -22,49 +22,35 @@ void GB_queue_remove // remove matrix from queue ASSERT (A != NULL) ; //-------------------------------------------------------------------------- - // remove the matrix from the queue + // remove the matrix from the queue, if it is in the queue //-------------------------------------------------------------------------- if (A->enqueued) { // remove the matrix from the queue - - #pragma omp critical GB_queue + #pragma omp critical (GB_queue) { - - // GraphBLAS is not (yet) parallel, but the user application might - // be. This update to the global queue must be done in a critical - // section. If both GraphBLAS and the user application are - // compiled with OpenMP, then the #pragma will protect the queue - // from a race condition of simulateneous updates. - + // check again to be safe, and remove A from the queue if (A->enqueued) { - // check the condition again, since GrB_wait could have been - // called by another thread, which removes all matrices from - // the queue, including this one. - - void *prev = A->queue_prev ; - void *next = A->queue_next ; - if (prev == NULL) + GrB_Matrix Prev = (GrB_Matrix) (A->queue_prev) ; + GrB_Matrix Next = (GrB_Matrix) (A->queue_next) ; + if (Prev == NULL) { // matrix is at the head of the queue; update the head - GB_Global.queue_head = next ; + GB_Global.queue_head = Next ; } else { // matrix is not the first in the queue - GrB_Matrix Prev = (GrB_Matrix) prev ; - Prev->queue_next = next ; + Prev->queue_next = Next ; } - if (next != NULL) + if (Next != NULL) { // update the previous link of the next matrix, if any - GrB_Matrix Next = (GrB_Matrix) next ; - Next->queue_prev = prev ; + Next->queue_prev = Prev ; } - - // matrix has been removed from the queue + // A has been removed from the queue A->queue_prev = NULL ; A->queue_next = NULL ; A->enqueued = false ; diff --git a/GraphBLAS/Source/GB_queue_remove_head.c b/GraphBLAS/Source/GB_queue_remove_head.c index de9bafb6ab..3c0300cb84 100644 --- a/GraphBLAS/Source/GB_queue_remove_head.c +++ b/GraphBLAS/Source/GB_queue_remove_head.c @@ -18,26 +18,23 @@ GrB_Matrix GB_queue_remove_head ( ) // return matrix or NULL if queue empty GrB_Matrix A = NULL ; - #pragma omp critical GB_queue + #pragma omp critical (GB_queue) { - - // GraphBLAS is not (yet) parallel, but the user application might - // be. This update to the global queue must be done in a critical - // section. If both GraphBLAS and the user application are - // compiled with OpenMP, then the #pragma will protect the queue - // from a race condition of simulateneous updates. - // get the matrix at the head of the queue A = (GrB_Matrix) (GB_Global.queue_head) ; - - // remove it from the queue + // remove A from the queue, if it exists if (A != NULL) { - // shift the head to the next matrix in the queue - GB_Global.queue_head = A->queue_next ; - - // mark this matrix has no longer in the queue + ASSERT (A->enqueued) ; ASSERT (A->queue_prev == NULL) ; + // shift the head to the next matrix in the queue + GrB_Matrix Next = (GrB_Matrix) A->queue_next ; + GB_Global.queue_head = Next ; + if (Next != NULL) + { + Next->queue_prev = NULL ; + } + // A has been removed from the queue A->queue_next = NULL ; A->enqueued = false ; } diff --git a/GraphBLAS/Source/GB_realloc_memory.c b/GraphBLAS/Source/GB_realloc_memory.c index 6984c1b99f..01cfd2438d 100644 --- a/GraphBLAS/Source/GB_realloc_memory.c +++ b/GraphBLAS/Source/GB_realloc_memory.c @@ -45,6 +45,7 @@ void *GB_realloc_memory // pointer to reallocated block of memory, or { size_t size ; + int nmalloc ; // make sure at least one item is allocated nitems_old = IMAX (1, nitems_old) ; @@ -76,8 +77,20 @@ void *GB_realloc_memory // pointer to reallocated block of memory, or // change the size of the object from nitems_old to nitems_new void *pnew ; - if (GB_thread_local.malloc_debug && - GB_thread_local.malloc_debug_count <= 0) + // check the malloc debug status. This debug flag is set outside + // of GraphBLAS and not modified, so it is safe to check it outside + // a critical section. + bool pretend_to_fail = false ; + if (GB_Global.malloc_debug) + { + // brutal malloc debug; pretend to fail if the count <= 0 + #pragma omp critical (GB_memory) + { + pretend_to_fail = (GB_Global.malloc_debug_count-- <= 0) ; + } + } + + if (pretend_to_fail) { // brutal malloc debug; pretend to fail if the count <= 0, pnew = NULL ; @@ -87,41 +100,40 @@ void *GB_realloc_memory // pointer to reallocated block of memory, or pnew = (void *) REALLOC (p, size) ; } -#ifdef PRINT_MALLOC - printf ("realloc: %14p %3d %1d n "GBu" -> "GBu" size "GBu"\n", - pnew, - (int) GB_thread_local.nmalloc, - GB_thread_local.malloc_debug, - nitems_old, nitems_new, size_of_item) ; -#endif - - if (pnew == NULL) + #pragma omp critical (GB_memory) { - if (nitems_new < nitems_old) + if (pnew == NULL) { - // the attempt to reduce the size of the block failed, but - // the old block is unchanged. So pretend to succeed. - (*ok) = true ; + if (nitems_new < nitems_old) + { + // the attempt to reduce the size of the block failed, but + // the old block is unchanged. So pretend to succeed. + (*ok) = true ; + GB_Global.inuse -= (nitems_old - nitems_new) * size_of_item; + } + else + { + // out of memory + (*ok) = false ; + } } else { - // out of memory - (*ok) = false ; + // success + p = pnew ; + (*ok) = true ; + GB_Global.inuse += (nitems_new - nitems_old) * size_of_item ; + GB_Global.maxused = IMAX (GB_Global.maxused, GB_Global.inuse) ; } + nmalloc = GB_Global.nmalloc ; } - else - { - // success - p = pnew ; - (*ok) = true ; - // a malloc has been used up if the size has increased - if (nitems_new > nitems_old && GB_thread_local.malloc_debug) - { - GB_thread_local.malloc_debug_count-- ; - } +#ifdef PRINT_MALLOC + printf ("realloc: %14p %3d %1d n "GBd" -> "GBd" size "GBd"\n", + pnew, nmalloc, GB_Global.malloc_debug, (int64_t) nitems_old, + (int64_t) nitems_new, (int64_t) size_of_item) ; +#endif - } } return (p) ; } diff --git a/GraphBLAS/Source/GB_reduce_to_column.c b/GraphBLAS/Source/GB_reduce_to_column.c index ddb7858954..c94d6db72c 100644 --- a/GraphBLAS/Source/GB_reduce_to_column.c +++ b/GraphBLAS/Source/GB_reduce_to_column.c @@ -140,6 +140,8 @@ GrB_Info GB_reduce_to_column // w = accum (w,reduce(A)) ASSERT (!PENDING (T)) ; ASSERT (!ZOMBIES (T)) ; ASSERT (!PENDING (A)) ; ASSERT (!ZOMBIES (A)) ; + // FUTURE: this function could easily tolerate zombies in A + //-------------------------------------------------------------------------- // scalar workspace //-------------------------------------------------------------------------- diff --git a/GraphBLAS/Source/GB_reduce_to_scalar.c b/GraphBLAS/Source/GB_reduce_to_scalar.c index e829c07ab9..aaeef6436d 100644 --- a/GraphBLAS/Source/GB_reduce_to_scalar.c +++ b/GraphBLAS/Source/GB_reduce_to_scalar.c @@ -9,7 +9,9 @@ // c = accum (c, reduce_to_scalar(A)), reduce entries in a matrix // to a scalar. Not user-callable. Does the work for GrB_*_reduce_TYPE, -// both matrix and vector. +// both matrix and vector. This funciton tolerates zombies and does not +// delete them. It does not tolerate pending tuples, so if they are present, +// all zombies are deleted and all pending tuples are assembled. #include "GB.h" @@ -27,9 +29,21 @@ GrB_Info GB_reduce_to_scalar // twork = reduce_to_scalar (A) // check inputs //-------------------------------------------------------------------------- - // delete any lingering zombies and assemble any pending tuples - // (required by Table 2.4 of the spec) - APPLY_PENDING_UPDATES (A) ; + // Zombies are an opaque internal detail of the GrB_Matrix data structure + // that do not depend on anything outside the matrix. Thus, Table 2.4 of + // the GrapBLAS spec, version 1.1.0, does not require their deletion. + // Pending tuples are different, since they rely on another object outside + // the matrix: the pending operator, which might be user-defined. Per + // Table 2.4, the user can expect that GrB_reduce applies the pending + // operator, which can then be deleted by the user. Thus, if the pending + // operator is user-defined it must be applied here. Assembling pending + // tuples requires zombies to be deleted first. Note that if the pending + // operator is built-in, then the updates could in principle be skipped, + // but this could be done only if the reduce monoid is the same as the + // pending operator. + + if (PENDING (A)) APPLY_PENDING_UPDATES (A) ; + ASSERT (ZOMBIES_OK (A)) ; // Zombies are tolerated, and not deleted RETURN_IF_NULL_OR_UNINITIALIZED (reduce) ; RETURN_IF_UNINITIALIZED (accum) ; RETURN_IF_NULL (c) ; @@ -63,7 +77,7 @@ GrB_Info GB_reduce_to_scalar // twork = reduce_to_scalar (A) int64_t asize = A->type->size ; int64_t anz = NNZ (A) ; - const void *Ax = A->x ; + const int64_t *restrict Ai = A->i ; int64_t zsize = ztype->size ; @@ -78,7 +92,7 @@ GrB_Info GB_reduce_to_scalar // twork = reduce_to_scalar (A) // twork = 0 memcpy (twork, reduce->identity, zsize) ; - // reduce all the entries in the matrix + // reduce all the entries in the matrix, but skip any zombies if (A->type == ztype) { @@ -99,13 +113,25 @@ GrB_Info GB_reduce_to_scalar // twork = reduce_to_scalar (A) // define the worker for the switch factory #define WORKER(type) \ { \ - const type *ax = (type *) Ax ; \ + const type *restrict Ax = (type *) A->x ; \ type s ; \ memcpy (&s, twork, zsize) ; \ - for (int64_t p = 0 ; p < anz ; p++) \ + if (A->nzombies == 0) \ { \ - /* s "+=" ax [p] */ \ - ADD (s, ax [p]) ; \ + for (int64_t p = 0 ; p < anz ; p++) \ + { \ + /* s += A(i,j) */ \ + ASSERT (IS_NOT_ZOMBIE (Ai [p])) ; \ + ADD (s, Ax [p]) ; \ + } \ + } \ + else \ + { \ + for (int64_t p = 0 ; p < anz ; p++) \ + { \ + /* s += A(i,j) if the entry is not a zombie */ \ + if (IS_NOT_ZOMBIE (Ai [p])) ADD (s, Ax [p]) ; \ + } \ } \ memcpy (twork, &s, zsize) ; \ done = true ; \ @@ -137,19 +163,36 @@ GrB_Info GB_reduce_to_scalar // twork = reduce_to_scalar (A) if (!done) { - - GB_binary_function freduce = reduce->op->function ; - // the switch factory didn't handle this case - for (int64_t p = 0 ; p < anz ; p++) + GB_binary_function freduce = reduce->op->function ; + const void *Ax = A->x ; + if (A->nzombies == 0) { - // wwork = twork - memcpy (wwork, twork, zsize) ; - // twork = wwork "+" Ax [p] - freduce (twork, wwork, Ax +(p*asize)) ; + for (int64_t p = 0 ; p < anz ; p++) + { + // twork += A(i,j) + ASSERT (IS_NOT_ZOMBIE (Ai [p])) ; + // wwork = twork + memcpy (wwork, twork, zsize) ; + // twork = wwork + Ax [p] + freduce (twork, wwork, Ax +(p*asize)) ; + } + } + else + { + for (int64_t p = 0 ; p < anz ; p++) + { + // twork += A(i,j) if not a zombie + if (IS_NOT_ZOMBIE (Ai [p])) + { + // wwork = twork + memcpy (wwork, twork, zsize) ; + // twork = wwork + Ax [p] + freduce (twork, wwork, Ax +(p*asize)) ; + } + } } } - } else { @@ -162,14 +205,36 @@ GrB_Info GB_reduce_to_scalar // twork = reduce_to_scalar (A) GB_cast_function cast_A_to_Z = GB_cast_factory (ztype->code, A->type->code) ; - for (int64_t p = 0 ; p < anz ; p++) + const void *Ax = A->x ; + if (A->nzombies == 0) { - // awork = (ztype) Ax [p] - cast_A_to_Z (awork, Ax +(p*asize), zsize) ; - // wwork = twork - memcpy (wwork, twork, zsize) ; - // twork = wwork "+" awork - freduce (twork, wwork, awork) ; + for (int64_t p = 0 ; p < anz ; p++) + { + // twork += (ztype) A(i,j) + ASSERT (IS_NOT_ZOMBIE (Ai [p])) ; + // awork = (ztype) Ax [p] + cast_A_to_Z (awork, Ax +(p*asize), zsize) ; + // wwork = twork + memcpy (wwork, twork, zsize) ; + // twork = wwork + awork + freduce (twork, wwork, awork) ; + } + } + else + { + for (int64_t p = 0 ; p < anz ; p++) + { + // twork += (ztype) A(i,j) if not a zombie + if (IS_NOT_ZOMBIE (Ai [p])) + { + // awork = (ztype) Ax [p] + cast_A_to_Z (awork, Ax +(p*asize), zsize) ; + // wwork = twork + memcpy (wwork, twork, zsize) ; + // twork = wwork + awork + freduce (twork, wwork, awork) ; + } + } } } diff --git a/GraphBLAS/Source/GB_semiring_builtin.c b/GraphBLAS/Source/GB_semiring_builtin.c index 2c29ff9596..7260ae34fc 100644 --- a/GraphBLAS/Source/GB_semiring_builtin.c +++ b/GraphBLAS/Source/GB_semiring_builtin.c @@ -42,18 +42,12 @@ bool GB_semiring_builtin // true if semiring is builtin // or not this function handles the semiring as hard-coded. Now return for // cases this function does not handle. - // This function requires A and B must have the same built-in type, - // and they must match the types x,y for fmult. - // If this condition doesn't hold, punt to the generic C=A*B: + // This function requires A and B to have the same built-in type, and they + // must match the types x,y for fmult. If this condition doesn't hold, + // punt to the generic C=A*B: if ((A->type != (flipxy ? mult->ytype : mult->xtype)) || (B->type != (flipxy ? mult->xtype : mult->ytype)) || (A->type != B->type) || (A->type->code == GB_UDT_code) - // This function handles only built-in operators. If either is - // user-defined, punt to generic C=A*B. Note that the switch factory - // below would also return false, but doing this here makes it - // explicit, just for better understanding of what this function does. - // It also means ASSERT's can be added, which serve to both explain the - // logic to the reader, and as a test when debugging is enabled: || (*add_opcode == GB_USER_opcode) || (*mult_opcode == GB_USER_opcode)) { return (false) ; diff --git a/GraphBLAS/Source/GB_subassign.c b/GraphBLAS/Source/GB_subassign.c index 5a2fd089f8..37737b358d 100644 --- a/GraphBLAS/Source/GB_subassign.c +++ b/GraphBLAS/Source/GB_subassign.c @@ -10,7 +10,7 @@ // submatrix assignment: C(I,J) = accum (C(I,J),A) // compare/contrast this function with GB_assign. -// All GrB_*_subassign operations rely on this function. +// All GxB_*_subassign operations rely on this function. // With scalar_expansion = false, this method does the work for the standard // GxB_*subassign operations (GxB_Matrix_subassign, GxB_Vector_subassign, @@ -154,6 +154,8 @@ GrB_Info GB_subassign // C(I,J) = accum (C(I,J),A) // apply pending updates to A and Mask //-------------------------------------------------------------------------- + // if C == Mask or C == A, pending updates are applied to C as well + // delete any lingering zombies and assemble any pending tuples // but only in A and Mask, not C APPLY_PENDING_UPDATES (Mask) ; @@ -196,11 +198,41 @@ GrB_Info GB_subassign // C(I,J) = accum (C(I,J),A) } //-------------------------------------------------------------------------- - // C(I,J) = A or accum (C(I,J),A), no transpose of A + // Z = C + //-------------------------------------------------------------------------- + + // GB_subassign_kernel modifies C efficiently in place, but it can only do + // so if C is not aliased with A2 or Mask. If C is aliased a copy must + // be made. GB_subassign_kernel operates on the copy, Z, which is then + // transplanted back into C when done. This is costly, and can have + // performance implications, but it is the only reasonable method. If C is + // aliased, then the assignment is a large one and copying the whole matrix + // will not add much time. + + GrB_Matrix Z ; + bool aliased = (C == A2 || C == Mask) ; + if (aliased) + { + // Z = duplicate of C + info = GB_Matrix_dup (&Z, C) ; + if (info != GrB_SUCCESS) + { + GB_MATRIX_FREE (&AT) ; + return (info) ; + } + } + else + { + // GB_subassign_kernel can safely operate on C in place + Z = C ; + } + + //-------------------------------------------------------------------------- + // Z(I,J) = A or accum (Z(I,J),A), no transpose of A //-------------------------------------------------------------------------- info = GB_subassign_kernel ( - C, C_replace, // C matrix and its descriptor + Z, C_replace, // Z matrix and its descriptor Mask, Mask_comp, // Mask matrix and its descriptor accum, // for accum (C(I,J),A) A2, // A matrix, NULL for scalar expansion @@ -211,6 +243,35 @@ GrB_Info GB_subassign // C(I,J) = accum (C(I,J),A) scalar_code) ; // type code of scalar to expand GB_MATRIX_FREE (&AT) ; + + //-------------------------------------------------------------------------- + // C = Z + //-------------------------------------------------------------------------- + + if (aliased) + { + if (info == GrB_SUCCESS) + { + // zombies can be transplanted into C but pending tuples cannot + if (Z->npending > 0) + { + // assemble all pending tuples, and delete all zombies too + info = GB_wait (Z) ; + } + } + if (info == GrB_SUCCESS) + { + // transplants the content of Z into C and frees Z + return (GB_Matrix_transplant (C, C->type, &Z)) ; + } + else + { + // Z needs to be freed if C is aliased but info != GrB_SUCCESS. + // (out of memory, or inputs invalid). C remains unchanged. + GB_MATRIX_FREE (&Z) ; + } + } + return (info) ; // pass info directly from GB_subassign_kernel } diff --git a/GraphBLAS/Source/GB_subassign_kernel.c b/GraphBLAS/Source/GB_subassign_kernel.c index 346125261f..ebb08c9178 100644 --- a/GraphBLAS/Source/GB_subassign_kernel.c +++ b/GraphBLAS/Source/GB_subassign_kernel.c @@ -42,7 +42,7 @@ GrB_Info GB_subassign_kernel // C(I,J) = A or accum (C (I,J), A) const GrB_Matrix Mask, // optional mask for C(I,J), unused if NULL const bool Mask_comp, // Mask descriptor const GrB_BinaryOp accum, // optional accum for Z=accum(C(I,J),A) - const GrB_Matrix A, // input matrix + const GrB_Matrix A, // input matrix (NULL for scalar expansion) const GrB_Index *I, // row indices const GrB_Index ni, // number of row indices const GrB_Index *J, // column indices @@ -53,6 +53,14 @@ GrB_Info GB_subassign_kernel // C(I,J) = A or accum (C (I,J), A) ) { + //-------------------------------------------------------------------------- + // check inputs + //-------------------------------------------------------------------------- + + // this function operates on C in place and this cannot be aliased with + // A or Mask + ASSERT (C != Mask && C != A) ; + //-------------------------------------------------------------------------- // check empty Mask conditions //-------------------------------------------------------------------------- @@ -308,7 +316,7 @@ GrB_Info GB_subassign_kernel // C(I,J) = A or accum (C (I,J), A) #define C_LOOKUP \ int64_t pC = Sx [pS] ; \ int64_t iC = Ci [pC] ; \ - bool is_zombie = IS_FLIPPED (iC) ; \ + bool is_zombie = IS_ZOMBIE (iC) ; \ if (is_zombie) iC = FLIP (iC) ; //-------------------------------------------------------------------------- @@ -1189,7 +1197,6 @@ GrB_Info GB_subassign_kernel // C(I,J) = A or accum (C (I,J), A) // ----[C - 0] replace // action: ( delete ): becomes a zombie C->nzombies++ ; - // printf ("C_repl delete , nzombies %lld\n", C->nzombies) ; Ci [pC] = FLIP (iC) ; } } @@ -1355,7 +1362,7 @@ GrB_Info GB_subassign_kernel // C(I,J) = A or accum (C (I,J), A) { int64_t iC = Maski [pM] ; int64_t pC = Cp [jC] + iC ; - bool is_zombie = IS_FLIPPED (Ci [pC]) ; + bool is_zombie = IS_ZOMBIE (Ci [pC]) ; ASSERT (UNFLIP (Ci [pC]) == iC) ; //------------------------------------------------------ diff --git a/GraphBLAS/Source/GB_transpose_ix.c b/GraphBLAS/Source/GB_transpose_ix.c index 575629d1b4..f193252ec2 100644 --- a/GraphBLAS/Source/GB_transpose_ix.c +++ b/GraphBLAS/Source/GB_transpose_ix.c @@ -57,7 +57,7 @@ void GB_transpose_ix // transpose the pattern and values of a matrix int64_t anz = Ap [n] ; for (int64_t p = 0 ; p < anz ; p++) { - ASSERT (!IS_FLIPPED (Ai [p])) ; + ASSERT (IS_NOT_ZOMBIE (Ai [p])) ; } #endif diff --git a/GraphBLAS/Source/GB_transpose_op.c b/GraphBLAS/Source/GB_transpose_op.c index fda175c9a1..f2a3a552a3 100644 --- a/GraphBLAS/Source/GB_transpose_op.c +++ b/GraphBLAS/Source/GB_transpose_op.c @@ -64,7 +64,7 @@ void GB_transpose_op // transpose and apply an operator to a matrix int64_t anz = Ap [n] ; for (int64_t p = 0 ; p < anz ; p++) { - ASSERT (!IS_FLIPPED (Ai [p])) ; + ASSERT (IS_NOT_ZOMBIE (Ai [p])) ; } #endif diff --git a/GraphBLAS/Source/GB_transpose_pattern.c b/GraphBLAS/Source/GB_transpose_pattern.c index c664b82f4e..053f8a30f0 100644 --- a/GraphBLAS/Source/GB_transpose_pattern.c +++ b/GraphBLAS/Source/GB_transpose_pattern.c @@ -48,7 +48,7 @@ void GB_transpose_pattern // transpose the pattern of a matrix int64_t anz = Ap [n] ; for (int64_t p = 0 ; p < anz ; p++) { - ASSERT (!IS_FLIPPED (Ai [p])) ; + ASSERT (IS_NOT_ZOMBIE (Ai [p])) ; } #endif diff --git a/GraphBLAS/Source/GB_wait.c b/GraphBLAS/Source/GB_wait.c index 7825819a18..7a7e5e1ec3 100644 --- a/GraphBLAS/Source/GB_wait.c +++ b/GraphBLAS/Source/GB_wait.c @@ -85,8 +85,7 @@ GrB_Info GB_wait // finish all pending computations for ( ; p < Ap [j+1] ; p++) { int64_t i = Ai [p] ; - bool is_zombie = IS_FLIPPED (i) ; - if (!is_zombie) + if (IS_NOT_ZOMBIE (i)) { // A(i,j) is not a zombie, keep it Ai [anz] = i ; @@ -163,7 +162,7 @@ GrB_Info GB_wait // finish all pending computations // if NULL operator: an implicit 'SECOND' function will be used // otherwise use A->operator_pending info = GB_builder (T, &(A->ipending), &(A->jpending), - A->sorted_pending, A->xpending, A->npending, + A->sorted_pending, A->xpending, A->npending, A->max_npending, A->operator_pending, A->type->code) ; //-------------------------------------------------------------------------- diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_bool.c new file mode 100644 index 0000000000..93f4cf9a39 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_bool +// A'*B function: GB_AdotB__eq_eq_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_fp32.c new file mode 100644 index 0000000000..f57b60ab78 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_fp32 +// A'*B function: GB_AdotB__eq_eq_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_fp64.c new file mode 100644 index 0000000000..440cce2eb4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_fp64 +// A'*B function: GB_AdotB__eq_eq_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int16.c new file mode 100644 index 0000000000..a441065712 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_int16 +// A'*B function: GB_AdotB__eq_eq_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int32.c new file mode 100644 index 0000000000..c19301e6b2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_int32 +// A'*B function: GB_AdotB__eq_eq_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int64.c new file mode 100644 index 0000000000..64db5e1bcb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_int64 +// A'*B function: GB_AdotB__eq_eq_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int8.c new file mode 100644 index 0000000000..5318b4a239 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_int8 +// A'*B function: GB_AdotB__eq_eq_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint16.c new file mode 100644 index 0000000000..f78c068da0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_uint16 +// A'*B function: GB_AdotB__eq_eq_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint32.c new file mode 100644 index 0000000000..876dcbcb9a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_uint32 +// A'*B function: GB_AdotB__eq_eq_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint64.c new file mode 100644 index 0000000000..d030c9de73 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_uint64 +// A'*B function: GB_AdotB__eq_eq_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint8.c new file mode 100644 index 0000000000..93c34572a5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_eq_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_eq_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_eq_uint8 +// A'*B function: GB_AdotB__eq_eq_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_first_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_first_bool.c new file mode 100644 index 0000000000..5744c328ff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_first_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_first_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_first_bool +// A'*B function: GB_AdotB__eq_first_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_bool.c new file mode 100644 index 0000000000..1923b42ce3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_bool +// A'*B function: GB_AdotB__eq_ge_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_fp32.c new file mode 100644 index 0000000000..19fa854a42 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_fp32 +// A'*B function: GB_AdotB__eq_ge_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_fp64.c new file mode 100644 index 0000000000..5b8b8f13ab --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_fp64 +// A'*B function: GB_AdotB__eq_ge_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int16.c new file mode 100644 index 0000000000..ea6c4ce249 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_int16 +// A'*B function: GB_AdotB__eq_ge_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int32.c new file mode 100644 index 0000000000..d3fc3869c8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_int32 +// A'*B function: GB_AdotB__eq_ge_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int64.c new file mode 100644 index 0000000000..0959d934c4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_int64 +// A'*B function: GB_AdotB__eq_ge_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int8.c new file mode 100644 index 0000000000..8811291d11 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_int8 +// A'*B function: GB_AdotB__eq_ge_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint16.c new file mode 100644 index 0000000000..9ca255bf04 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_uint16 +// A'*B function: GB_AdotB__eq_ge_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint32.c new file mode 100644 index 0000000000..da00e1c337 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_uint32 +// A'*B function: GB_AdotB__eq_ge_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint64.c new file mode 100644 index 0000000000..f926122b4a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_uint64 +// A'*B function: GB_AdotB__eq_ge_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint8.c new file mode 100644 index 0000000000..c233dc71be --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ge_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ge_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ge_uint8 +// A'*B function: GB_AdotB__eq_ge_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_bool.c new file mode 100644 index 0000000000..9366d97712 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_bool +// A'*B function: GB_AdotB__eq_gt_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_fp32.c new file mode 100644 index 0000000000..115cf478ff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_fp32 +// A'*B function: GB_AdotB__eq_gt_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_fp64.c new file mode 100644 index 0000000000..a72839c3e8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_fp64 +// A'*B function: GB_AdotB__eq_gt_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int16.c new file mode 100644 index 0000000000..eca56530c7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_int16 +// A'*B function: GB_AdotB__eq_gt_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int32.c new file mode 100644 index 0000000000..e00fbe5a20 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_int32 +// A'*B function: GB_AdotB__eq_gt_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int64.c new file mode 100644 index 0000000000..ac7e2851a7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_int64 +// A'*B function: GB_AdotB__eq_gt_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int8.c new file mode 100644 index 0000000000..03891752b3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_int8 +// A'*B function: GB_AdotB__eq_gt_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint16.c new file mode 100644 index 0000000000..3d3786c62d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_uint16 +// A'*B function: GB_AdotB__eq_gt_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint32.c new file mode 100644 index 0000000000..ba4d997eb5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_uint32 +// A'*B function: GB_AdotB__eq_gt_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint64.c new file mode 100644 index 0000000000..f6304260c7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_uint64 +// A'*B function: GB_AdotB__eq_gt_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint8.c new file mode 100644 index 0000000000..a62c6c3ff0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_gt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_gt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_gt_uint8 +// A'*B function: GB_AdotB__eq_gt_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_land_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_land_bool.c new file mode 100644 index 0000000000..3c8271d9d2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_land_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_land_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_land_bool +// A'*B function: GB_AdotB__eq_land_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) && (bkj != 0) ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_bool.c new file mode 100644 index 0000000000..f1a7052514 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_bool +// A'*B function: GB_AdotB__eq_le_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_fp32.c new file mode 100644 index 0000000000..55cc010fa9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_fp32 +// A'*B function: GB_AdotB__eq_le_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_fp64.c new file mode 100644 index 0000000000..5911419028 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_fp64 +// A'*B function: GB_AdotB__eq_le_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_int16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_int16.c new file mode 100644 index 0000000000..80c47b6f80 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_int16 +// A'*B function: GB_AdotB__eq_le_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_int32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_int32.c new file mode 100644 index 0000000000..dc3794ea37 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_int32 +// A'*B function: GB_AdotB__eq_le_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_int64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_int64.c new file mode 100644 index 0000000000..f89cd297fc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_int64 +// A'*B function: GB_AdotB__eq_le_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_int8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_int8.c new file mode 100644 index 0000000000..595d67b8d6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_int8 +// A'*B function: GB_AdotB__eq_le_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint16.c new file mode 100644 index 0000000000..ac8e65676e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_uint16 +// A'*B function: GB_AdotB__eq_le_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint32.c new file mode 100644 index 0000000000..f0d2b75289 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_uint32 +// A'*B function: GB_AdotB__eq_le_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint64.c new file mode 100644 index 0000000000..b0a7b29bc5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_uint64 +// A'*B function: GB_AdotB__eq_le_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint8.c new file mode 100644 index 0000000000..e2e4b57a1f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_le_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_le_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_le_uint8 +// A'*B function: GB_AdotB__eq_le_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lor_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lor_bool.c new file mode 100644 index 0000000000..7f8c10acb9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lor_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lor_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lor_bool +// A'*B function: GB_AdotB__eq_lor_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) || (bkj != 0) ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_bool.c new file mode 100644 index 0000000000..d58bbd647e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_bool +// A'*B function: GB_AdotB__eq_lt_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_fp32.c new file mode 100644 index 0000000000..a823c061d0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_fp32 +// A'*B function: GB_AdotB__eq_lt_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_fp64.c new file mode 100644 index 0000000000..a7099a2eb3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_fp64 +// A'*B function: GB_AdotB__eq_lt_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int16.c new file mode 100644 index 0000000000..6d82e08895 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_int16 +// A'*B function: GB_AdotB__eq_lt_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int32.c new file mode 100644 index 0000000000..0f7f4b1fe3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_int32 +// A'*B function: GB_AdotB__eq_lt_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int64.c new file mode 100644 index 0000000000..cfb6679e3a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_int64 +// A'*B function: GB_AdotB__eq_lt_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int8.c new file mode 100644 index 0000000000..1152a9b710 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_int8 +// A'*B function: GB_AdotB__eq_lt_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint16.c new file mode 100644 index 0000000000..aee185b47a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_uint16 +// A'*B function: GB_AdotB__eq_lt_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint32.c new file mode 100644 index 0000000000..887b796972 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_uint32 +// A'*B function: GB_AdotB__eq_lt_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint64.c new file mode 100644 index 0000000000..9ea10354c3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_uint64 +// A'*B function: GB_AdotB__eq_lt_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint8.c new file mode 100644 index 0000000000..bc20306b57 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lt_uint8 +// A'*B function: GB_AdotB__eq_lt_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_lxor_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_lxor_bool.c new file mode 100644 index 0000000000..ed6e4ce497 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_lxor_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_lxor_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_lxor_bool +// A'*B function: GB_AdotB__eq_lxor_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) != (bkj != 0) ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_fp32.c new file mode 100644 index 0000000000..3530b1f668 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_fp32 +// A'*B function: GB_AdotB__eq_ne_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_fp64.c new file mode 100644 index 0000000000..b547026cb7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_fp64 +// A'*B function: GB_AdotB__eq_ne_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int16.c new file mode 100644 index 0000000000..dc5fdf5638 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_int16 +// A'*B function: GB_AdotB__eq_ne_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int32.c new file mode 100644 index 0000000000..bc6c920591 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_int32 +// A'*B function: GB_AdotB__eq_ne_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int64.c new file mode 100644 index 0000000000..69a4cf07e9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_int64 +// A'*B function: GB_AdotB__eq_ne_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int8.c new file mode 100644 index 0000000000..29cc761bd0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_int8 +// A'*B function: GB_AdotB__eq_ne_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint16.c new file mode 100644 index 0000000000..f7864540ea --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_uint16 +// A'*B function: GB_AdotB__eq_ne_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint32.c new file mode 100644 index 0000000000..e098e12471 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_uint32 +// A'*B function: GB_AdotB__eq_ne_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint64.c new file mode 100644 index 0000000000..5af3db8787 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_uint64 +// A'*B function: GB_AdotB__eq_ne_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint8.c new file mode 100644 index 0000000000..2f11961f68 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_ne_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_ne_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_ne_uint8 +// A'*B function: GB_AdotB__eq_ne_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__eq_second_bool.c b/GraphBLAS/Source/Generated/GB_AxB__eq_second_bool.c new file mode 100644 index 0000000000..a1a4795dc0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__eq_second_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__eq_second_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__eq_second_bool +// A'*B function: GB_AdotB__eq_second_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij == true) does not change cij) +// Multiply: t = (bkj) +// Add: cij = (cij == t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__eq_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] == t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = bkj ; + w [i] = (w [i] == t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__eq_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij == t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij == t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij == t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij == t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_bool.c new file mode 100644 index 0000000000..c080fa9712 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_bool +// A'*B function: GB_AdotB__land_eq_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_fp32.c new file mode 100644 index 0000000000..5bf469eb50 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_fp32 +// A'*B function: GB_AdotB__land_eq_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_fp64.c new file mode 100644 index 0000000000..918abfa98a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_fp64 +// A'*B function: GB_AdotB__land_eq_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_int16.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_int16.c new file mode 100644 index 0000000000..b684aa11e5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_int16 +// A'*B function: GB_AdotB__land_eq_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_int32.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_int32.c new file mode 100644 index 0000000000..b41ac705e2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_int32 +// A'*B function: GB_AdotB__land_eq_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_int64.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_int64.c new file mode 100644 index 0000000000..9685f7dced --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_int64 +// A'*B function: GB_AdotB__land_eq_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_int8.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_int8.c new file mode 100644 index 0000000000..0de694f603 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_int8 +// A'*B function: GB_AdotB__land_eq_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint16.c new file mode 100644 index 0000000000..2401a5e8f0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_uint16 +// A'*B function: GB_AdotB__land_eq_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint32.c new file mode 100644 index 0000000000..c0f4477e91 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_uint32 +// A'*B function: GB_AdotB__land_eq_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint64.c new file mode 100644 index 0000000000..491166bb4f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_uint64 +// A'*B function: GB_AdotB__land_eq_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint8.c new file mode 100644 index 0000000000..125250700d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_eq_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_eq_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_eq_uint8 +// A'*B function: GB_AdotB__land_eq_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_first_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_first_bool.c new file mode 100644 index 0000000000..55972d8f38 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_first_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_first_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_first_bool +// A'*B function: GB_AdotB__land_first_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_bool.c new file mode 100644 index 0000000000..63ddb740ad --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_bool +// A'*B function: GB_AdotB__land_ge_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_fp32.c new file mode 100644 index 0000000000..9ed81520b2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_fp32 +// A'*B function: GB_AdotB__land_ge_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_fp64.c new file mode 100644 index 0000000000..25344e26a3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_fp64 +// A'*B function: GB_AdotB__land_ge_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_int16.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_int16.c new file mode 100644 index 0000000000..2911a99d8e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_int16 +// A'*B function: GB_AdotB__land_ge_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_int32.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_int32.c new file mode 100644 index 0000000000..e4fd407c22 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_int32 +// A'*B function: GB_AdotB__land_ge_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_int64.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_int64.c new file mode 100644 index 0000000000..8aed18ed02 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_int64 +// A'*B function: GB_AdotB__land_ge_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_int8.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_int8.c new file mode 100644 index 0000000000..bd05e1e857 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_int8 +// A'*B function: GB_AdotB__land_ge_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint16.c new file mode 100644 index 0000000000..990783fc75 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_uint16 +// A'*B function: GB_AdotB__land_ge_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint32.c new file mode 100644 index 0000000000..ba68c276ac --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_uint32 +// A'*B function: GB_AdotB__land_ge_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint64.c new file mode 100644 index 0000000000..494e2e6899 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_uint64 +// A'*B function: GB_AdotB__land_ge_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint8.c new file mode 100644 index 0000000000..4f20fcc4ac --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ge_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ge_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ge_uint8 +// A'*B function: GB_AdotB__land_ge_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_bool.c new file mode 100644 index 0000000000..971bb08ea4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_bool +// A'*B function: GB_AdotB__land_gt_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_fp32.c new file mode 100644 index 0000000000..9261474c57 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_fp32 +// A'*B function: GB_AdotB__land_gt_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_fp64.c new file mode 100644 index 0000000000..93db58da9e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_fp64 +// A'*B function: GB_AdotB__land_gt_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_int16.c new file mode 100644 index 0000000000..a4eecea2b7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_int16 +// A'*B function: GB_AdotB__land_gt_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_int32.c new file mode 100644 index 0000000000..59ac0d5fcd --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_int32 +// A'*B function: GB_AdotB__land_gt_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_int64.c new file mode 100644 index 0000000000..f09a34b8c6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_int64 +// A'*B function: GB_AdotB__land_gt_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_int8.c new file mode 100644 index 0000000000..7c8f8cc6cd --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_int8 +// A'*B function: GB_AdotB__land_gt_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint16.c new file mode 100644 index 0000000000..60f1f86eba --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_uint16 +// A'*B function: GB_AdotB__land_gt_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint32.c new file mode 100644 index 0000000000..d5fc75e3e3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_uint32 +// A'*B function: GB_AdotB__land_gt_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint64.c new file mode 100644 index 0000000000..e619854a4e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_uint64 +// A'*B function: GB_AdotB__land_gt_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint8.c new file mode 100644 index 0000000000..8ff643d9b0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_gt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_gt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_gt_uint8 +// A'*B function: GB_AdotB__land_gt_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_land_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_land_bool.c new file mode 100644 index 0000000000..ff87251670 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_land_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_land_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_land_bool +// A'*B function: GB_AdotB__land_land_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) && (bkj != 0) ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_bool.c new file mode 100644 index 0000000000..acbeda73e6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_bool +// A'*B function: GB_AdotB__land_le_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_fp32.c new file mode 100644 index 0000000000..626dec28fc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_fp32 +// A'*B function: GB_AdotB__land_le_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_fp64.c new file mode 100644 index 0000000000..e1da9e93e5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_fp64 +// A'*B function: GB_AdotB__land_le_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_int16.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_int16.c new file mode 100644 index 0000000000..907d7fd3a3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_int16 +// A'*B function: GB_AdotB__land_le_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_int32.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_int32.c new file mode 100644 index 0000000000..1c205355c2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_int32 +// A'*B function: GB_AdotB__land_le_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_int64.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_int64.c new file mode 100644 index 0000000000..28da6fc929 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_int64 +// A'*B function: GB_AdotB__land_le_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_int8.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_int8.c new file mode 100644 index 0000000000..0ac675ddfa --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_int8 +// A'*B function: GB_AdotB__land_le_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_uint16.c new file mode 100644 index 0000000000..3c48953723 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_uint16 +// A'*B function: GB_AdotB__land_le_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_uint32.c new file mode 100644 index 0000000000..10b09eecdb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_uint32 +// A'*B function: GB_AdotB__land_le_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_uint64.c new file mode 100644 index 0000000000..c44915d030 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_uint64 +// A'*B function: GB_AdotB__land_le_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_le_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__land_le_uint8.c new file mode 100644 index 0000000000..f5d50513f1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_le_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_le_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_le_uint8 +// A'*B function: GB_AdotB__land_le_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lor_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_lor_bool.c new file mode 100644 index 0000000000..c61c085d52 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lor_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lor_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lor_bool +// A'*B function: GB_AdotB__land_lor_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) || (bkj != 0) ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_bool.c new file mode 100644 index 0000000000..f7b8000f17 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_bool +// A'*B function: GB_AdotB__land_lt_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_fp32.c new file mode 100644 index 0000000000..40726731f6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_fp32 +// A'*B function: GB_AdotB__land_lt_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_fp64.c new file mode 100644 index 0000000000..b934c30263 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_fp64 +// A'*B function: GB_AdotB__land_lt_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_int16.c new file mode 100644 index 0000000000..16ae765039 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_int16 +// A'*B function: GB_AdotB__land_lt_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_int32.c new file mode 100644 index 0000000000..ff26bef2c2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_int32 +// A'*B function: GB_AdotB__land_lt_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_int64.c new file mode 100644 index 0000000000..f8de86ec22 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_int64 +// A'*B function: GB_AdotB__land_lt_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_int8.c new file mode 100644 index 0000000000..fb4cbf0168 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_int8 +// A'*B function: GB_AdotB__land_lt_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint16.c new file mode 100644 index 0000000000..48082b6679 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_uint16 +// A'*B function: GB_AdotB__land_lt_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint32.c new file mode 100644 index 0000000000..eac8fe51c4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_uint32 +// A'*B function: GB_AdotB__land_lt_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint64.c new file mode 100644 index 0000000000..cd348194be --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_uint64 +// A'*B function: GB_AdotB__land_lt_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint8.c new file mode 100644 index 0000000000..4990af833d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lt_uint8 +// A'*B function: GB_AdotB__land_lt_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_lxor_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_lxor_bool.c new file mode 100644 index 0000000000..34ad8230e1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_lxor_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_lxor_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_lxor_bool +// A'*B function: GB_AdotB__land_lxor_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) != (bkj != 0) ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_fp32.c new file mode 100644 index 0000000000..8173fdc512 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_fp32 +// A'*B function: GB_AdotB__land_ne_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_fp64.c new file mode 100644 index 0000000000..2cba6bc41e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_fp64 +// A'*B function: GB_AdotB__land_ne_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_int16.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_int16.c new file mode 100644 index 0000000000..d1b90a7eba --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_int16 +// A'*B function: GB_AdotB__land_ne_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_int32.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_int32.c new file mode 100644 index 0000000000..a84958d74c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_int32 +// A'*B function: GB_AdotB__land_ne_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_int64.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_int64.c new file mode 100644 index 0000000000..d8cb1af870 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_int64 +// A'*B function: GB_AdotB__land_ne_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_int8.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_int8.c new file mode 100644 index 0000000000..ce6f08c997 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_int8 +// A'*B function: GB_AdotB__land_ne_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint16.c new file mode 100644 index 0000000000..6b66c53cb7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_uint16 +// A'*B function: GB_AdotB__land_ne_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint32.c new file mode 100644 index 0000000000..bcc19dd77f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_uint32 +// A'*B function: GB_AdotB__land_ne_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint64.c new file mode 100644 index 0000000000..4e39c4367a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_uint64 +// A'*B function: GB_AdotB__land_ne_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint8.c new file mode 100644 index 0000000000..d2664367c5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_ne_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_ne_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_ne_uint8 +// A'*B function: GB_AdotB__land_ne_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__land_second_bool.c b/GraphBLAS/Source/Generated/GB_AxB__land_second_bool.c new file mode 100644 index 0000000000..7b500c26ae --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__land_second_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__land_second_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__land_second_bool +// A'*B function: GB_AdotB__land_second_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: true (where cij = (cij && true) does not change cij) +// Multiply: t = (bkj) +// Add: cij = (cij && t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__land_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] && t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = true ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = true ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = bkj ; + w [i] = (w [i] && t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__land_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij && t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij && t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij && t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = true ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij && t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_bool.c new file mode 100644 index 0000000000..4423387f5b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_bool +// A'*B function: GB_AdotB__lor_eq_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_fp32.c new file mode 100644 index 0000000000..295f67ba8a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_fp32 +// A'*B function: GB_AdotB__lor_eq_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_fp64.c new file mode 100644 index 0000000000..875e0bd26c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_fp64 +// A'*B function: GB_AdotB__lor_eq_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int16.c new file mode 100644 index 0000000000..00e6ea71f8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_int16 +// A'*B function: GB_AdotB__lor_eq_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int32.c new file mode 100644 index 0000000000..037e42cb08 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_int32 +// A'*B function: GB_AdotB__lor_eq_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int64.c new file mode 100644 index 0000000000..fe69a33ad4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_int64 +// A'*B function: GB_AdotB__lor_eq_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int8.c new file mode 100644 index 0000000000..7cf58a2a50 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_int8 +// A'*B function: GB_AdotB__lor_eq_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint16.c new file mode 100644 index 0000000000..a1c6d7d7dd --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_uint16 +// A'*B function: GB_AdotB__lor_eq_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint32.c new file mode 100644 index 0000000000..3acee2898d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_uint32 +// A'*B function: GB_AdotB__lor_eq_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint64.c new file mode 100644 index 0000000000..6a98d74aef --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_uint64 +// A'*B function: GB_AdotB__lor_eq_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint8.c new file mode 100644 index 0000000000..974fefd6d1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_eq_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_eq_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_eq_uint8 +// A'*B function: GB_AdotB__lor_eq_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_first_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_first_bool.c new file mode 100644 index 0000000000..c5ff649896 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_first_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_first_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_first_bool +// A'*B function: GB_AdotB__lor_first_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_bool.c new file mode 100644 index 0000000000..9cb12286a8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_bool +// A'*B function: GB_AdotB__lor_ge_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_fp32.c new file mode 100644 index 0000000000..5950e39f6a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_fp32 +// A'*B function: GB_AdotB__lor_ge_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_fp64.c new file mode 100644 index 0000000000..be03aba5d7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_fp64 +// A'*B function: GB_AdotB__lor_ge_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int16.c new file mode 100644 index 0000000000..d2eb0b260b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_int16 +// A'*B function: GB_AdotB__lor_ge_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int32.c new file mode 100644 index 0000000000..7affbb7bb4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_int32 +// A'*B function: GB_AdotB__lor_ge_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int64.c new file mode 100644 index 0000000000..613ce99edc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_int64 +// A'*B function: GB_AdotB__lor_ge_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int8.c new file mode 100644 index 0000000000..bff58a4ae3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_int8 +// A'*B function: GB_AdotB__lor_ge_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint16.c new file mode 100644 index 0000000000..d4a659e4af --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_uint16 +// A'*B function: GB_AdotB__lor_ge_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint32.c new file mode 100644 index 0000000000..067caaf868 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_uint32 +// A'*B function: GB_AdotB__lor_ge_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint64.c new file mode 100644 index 0000000000..5825a8a4c1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_uint64 +// A'*B function: GB_AdotB__lor_ge_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint8.c new file mode 100644 index 0000000000..9d01a9f8b0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ge_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ge_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ge_uint8 +// A'*B function: GB_AdotB__lor_ge_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_bool.c new file mode 100644 index 0000000000..393e892396 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_bool +// A'*B function: GB_AdotB__lor_gt_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_fp32.c new file mode 100644 index 0000000000..f175c210b6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_fp32 +// A'*B function: GB_AdotB__lor_gt_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_fp64.c new file mode 100644 index 0000000000..91b74140da --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_fp64 +// A'*B function: GB_AdotB__lor_gt_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int16.c new file mode 100644 index 0000000000..e0ff9b7940 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_int16 +// A'*B function: GB_AdotB__lor_gt_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int32.c new file mode 100644 index 0000000000..7fb24d9a83 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_int32 +// A'*B function: GB_AdotB__lor_gt_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int64.c new file mode 100644 index 0000000000..31bd062820 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_int64 +// A'*B function: GB_AdotB__lor_gt_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int8.c new file mode 100644 index 0000000000..3882ca0740 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_int8 +// A'*B function: GB_AdotB__lor_gt_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint16.c new file mode 100644 index 0000000000..617fbfd113 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_uint16 +// A'*B function: GB_AdotB__lor_gt_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint32.c new file mode 100644 index 0000000000..0b8c083ecb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_uint32 +// A'*B function: GB_AdotB__lor_gt_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint64.c new file mode 100644 index 0000000000..e6e3b5f2da --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_uint64 +// A'*B function: GB_AdotB__lor_gt_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint8.c new file mode 100644 index 0000000000..aa74bb262e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_gt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_gt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_gt_uint8 +// A'*B function: GB_AdotB__lor_gt_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_land_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_land_bool.c new file mode 100644 index 0000000000..eb82b62b87 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_land_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_land_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_land_bool +// A'*B function: GB_AdotB__lor_land_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) && (bkj != 0) ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_bool.c new file mode 100644 index 0000000000..5df6165d2f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_bool +// A'*B function: GB_AdotB__lor_le_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_fp32.c new file mode 100644 index 0000000000..170520310c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_fp32 +// A'*B function: GB_AdotB__lor_le_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_fp64.c new file mode 100644 index 0000000000..d0e1f4673b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_fp64 +// A'*B function: GB_AdotB__lor_le_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_int16.c new file mode 100644 index 0000000000..794ba59027 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_int16 +// A'*B function: GB_AdotB__lor_le_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_int32.c new file mode 100644 index 0000000000..e2bc5d9fd1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_int32 +// A'*B function: GB_AdotB__lor_le_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_int64.c new file mode 100644 index 0000000000..078189cfba --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_int64 +// A'*B function: GB_AdotB__lor_le_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_int8.c new file mode 100644 index 0000000000..237d05c1ad --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_int8 +// A'*B function: GB_AdotB__lor_le_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint16.c new file mode 100644 index 0000000000..afe90be16b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_uint16 +// A'*B function: GB_AdotB__lor_le_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint32.c new file mode 100644 index 0000000000..d78be1cd4e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_uint32 +// A'*B function: GB_AdotB__lor_le_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint64.c new file mode 100644 index 0000000000..e8d0e1aea2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_uint64 +// A'*B function: GB_AdotB__lor_le_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint8.c new file mode 100644 index 0000000000..4dd48edbb8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_le_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_le_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_le_uint8 +// A'*B function: GB_AdotB__lor_le_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lor_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lor_bool.c new file mode 100644 index 0000000000..f3a6dc85bf --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lor_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lor_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lor_bool +// A'*B function: GB_AdotB__lor_lor_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) || (bkj != 0) ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_bool.c new file mode 100644 index 0000000000..914004df0c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_bool +// A'*B function: GB_AdotB__lor_lt_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_fp32.c new file mode 100644 index 0000000000..3783fa554c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_fp32 +// A'*B function: GB_AdotB__lor_lt_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_fp64.c new file mode 100644 index 0000000000..549a3c470e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_fp64 +// A'*B function: GB_AdotB__lor_lt_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int16.c new file mode 100644 index 0000000000..f96789fc53 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_int16 +// A'*B function: GB_AdotB__lor_lt_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int32.c new file mode 100644 index 0000000000..f5edf09599 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_int32 +// A'*B function: GB_AdotB__lor_lt_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int64.c new file mode 100644 index 0000000000..77aca146d6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_int64 +// A'*B function: GB_AdotB__lor_lt_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int8.c new file mode 100644 index 0000000000..b2bfcc030e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_int8 +// A'*B function: GB_AdotB__lor_lt_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint16.c new file mode 100644 index 0000000000..c137511e52 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_uint16 +// A'*B function: GB_AdotB__lor_lt_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint32.c new file mode 100644 index 0000000000..ac35907021 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_uint32 +// A'*B function: GB_AdotB__lor_lt_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint64.c new file mode 100644 index 0000000000..0003f4a69b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_uint64 +// A'*B function: GB_AdotB__lor_lt_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint8.c new file mode 100644 index 0000000000..0e32970990 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lt_uint8 +// A'*B function: GB_AdotB__lor_lt_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_lxor_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_lxor_bool.c new file mode 100644 index 0000000000..7da8f89ace --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_lxor_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_lxor_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_lxor_bool +// A'*B function: GB_AdotB__lor_lxor_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) != (bkj != 0) ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_fp32.c new file mode 100644 index 0000000000..27a288f61d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_fp32 +// A'*B function: GB_AdotB__lor_ne_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_fp64.c new file mode 100644 index 0000000000..2ad3beba8d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_fp64 +// A'*B function: GB_AdotB__lor_ne_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int16.c new file mode 100644 index 0000000000..f07091c3ac --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_int16 +// A'*B function: GB_AdotB__lor_ne_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int32.c new file mode 100644 index 0000000000..2c4ecc57a1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_int32 +// A'*B function: GB_AdotB__lor_ne_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int64.c new file mode 100644 index 0000000000..56dc1f4a3e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_int64 +// A'*B function: GB_AdotB__lor_ne_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int8.c new file mode 100644 index 0000000000..357fa25040 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_int8 +// A'*B function: GB_AdotB__lor_ne_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint16.c new file mode 100644 index 0000000000..a154f487ce --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_uint16 +// A'*B function: GB_AdotB__lor_ne_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint32.c new file mode 100644 index 0000000000..e4b650b524 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_uint32 +// A'*B function: GB_AdotB__lor_ne_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint64.c new file mode 100644 index 0000000000..a137c3e216 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_uint64 +// A'*B function: GB_AdotB__lor_ne_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint8.c new file mode 100644 index 0000000000..84ea4c23c3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_ne_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_ne_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_ne_uint8 +// A'*B function: GB_AdotB__lor_ne_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lor_second_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lor_second_bool.c new file mode 100644 index 0000000000..7518f1d861 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lor_second_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lor_second_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lor_second_bool +// A'*B function: GB_AdotB__lor_second_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij || false) does not change cij) +// Multiply: t = (bkj) +// Add: cij = (cij || t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lor_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] || t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = bkj ; + w [i] = (w [i] || t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lor_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij || t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij || t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij || t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij || t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_bool.c new file mode 100644 index 0000000000..6aa567761b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_bool +// A'*B function: GB_AdotB__lxor_eq_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_fp32.c new file mode 100644 index 0000000000..63c7112df7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_fp32 +// A'*B function: GB_AdotB__lxor_eq_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_fp64.c new file mode 100644 index 0000000000..3d283d056a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_fp64 +// A'*B function: GB_AdotB__lxor_eq_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int16.c new file mode 100644 index 0000000000..d54f3adc8d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_int16 +// A'*B function: GB_AdotB__lxor_eq_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int32.c new file mode 100644 index 0000000000..57f269635d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_int32 +// A'*B function: GB_AdotB__lxor_eq_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int64.c new file mode 100644 index 0000000000..c5ac120b04 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_int64 +// A'*B function: GB_AdotB__lxor_eq_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int8.c new file mode 100644 index 0000000000..c33d4f642b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_int8 +// A'*B function: GB_AdotB__lxor_eq_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint16.c new file mode 100644 index 0000000000..591bc72cde --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_uint16 +// A'*B function: GB_AdotB__lxor_eq_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint32.c new file mode 100644 index 0000000000..3649aee704 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_uint32 +// A'*B function: GB_AdotB__lxor_eq_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint64.c new file mode 100644 index 0000000000..28001b4464 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_uint64 +// A'*B function: GB_AdotB__lxor_eq_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint8.c new file mode 100644 index 0000000000..a9198425f1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_eq_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_eq_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_eq_uint8 +// A'*B function: GB_AdotB__lxor_eq_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik == bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki == bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_first_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_first_bool.c new file mode 100644 index 0000000000..7c7fb87883 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_first_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_first_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_first_bool +// A'*B function: GB_AdotB__lxor_first_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_bool.c new file mode 100644 index 0000000000..2df57b08e4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_bool +// A'*B function: GB_AdotB__lxor_ge_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_fp32.c new file mode 100644 index 0000000000..71cbe5111d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_fp32 +// A'*B function: GB_AdotB__lxor_ge_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_fp64.c new file mode 100644 index 0000000000..92053f1856 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_fp64 +// A'*B function: GB_AdotB__lxor_ge_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int16.c new file mode 100644 index 0000000000..4d9db48d70 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_int16 +// A'*B function: GB_AdotB__lxor_ge_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int32.c new file mode 100644 index 0000000000..bca1305ef2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_int32 +// A'*B function: GB_AdotB__lxor_ge_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int64.c new file mode 100644 index 0000000000..02b9c4c19a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_int64 +// A'*B function: GB_AdotB__lxor_ge_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int8.c new file mode 100644 index 0000000000..69b0c2c4f4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_int8 +// A'*B function: GB_AdotB__lxor_ge_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint16.c new file mode 100644 index 0000000000..6ec6be3679 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_uint16 +// A'*B function: GB_AdotB__lxor_ge_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint32.c new file mode 100644 index 0000000000..20a46dae8b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_uint32 +// A'*B function: GB_AdotB__lxor_ge_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint64.c new file mode 100644 index 0000000000..22216b0bff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_uint64 +// A'*B function: GB_AdotB__lxor_ge_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint8.c new file mode 100644 index 0000000000..32f7d798ad --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ge_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ge_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ge_uint8 +// A'*B function: GB_AdotB__lxor_ge_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik >= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki >= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_bool.c new file mode 100644 index 0000000000..49224e92ee --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_bool +// A'*B function: GB_AdotB__lxor_gt_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_fp32.c new file mode 100644 index 0000000000..353bf0f0c5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_fp32 +// A'*B function: GB_AdotB__lxor_gt_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_fp64.c new file mode 100644 index 0000000000..b2bdbdda9e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_fp64 +// A'*B function: GB_AdotB__lxor_gt_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int16.c new file mode 100644 index 0000000000..3295402cff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_int16 +// A'*B function: GB_AdotB__lxor_gt_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int32.c new file mode 100644 index 0000000000..1956ef8ce3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_int32 +// A'*B function: GB_AdotB__lxor_gt_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int64.c new file mode 100644 index 0000000000..1ebac11fbc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_int64 +// A'*B function: GB_AdotB__lxor_gt_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int8.c new file mode 100644 index 0000000000..83d2d03fc9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_int8 +// A'*B function: GB_AdotB__lxor_gt_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint16.c new file mode 100644 index 0000000000..87eb7d986d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_uint16 +// A'*B function: GB_AdotB__lxor_gt_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint32.c new file mode 100644 index 0000000000..d2b646cd44 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_uint32 +// A'*B function: GB_AdotB__lxor_gt_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint64.c new file mode 100644 index 0000000000..ff68d6aa36 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_uint64 +// A'*B function: GB_AdotB__lxor_gt_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint8.c new file mode 100644 index 0000000000..4f32c97a9b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_gt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_gt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_gt_uint8 +// A'*B function: GB_AdotB__lxor_gt_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik > bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki > bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_land_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_land_bool.c new file mode 100644 index 0000000000..9bf64b0566 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_land_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_land_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_land_bool +// A'*B function: GB_AdotB__lxor_land_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) && (bkj != 0) ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) && (bkj != 0) ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_bool.c new file mode 100644 index 0000000000..62cb908b4b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_bool +// A'*B function: GB_AdotB__lxor_le_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_fp32.c new file mode 100644 index 0000000000..10a9d72fec --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_fp32 +// A'*B function: GB_AdotB__lxor_le_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_fp64.c new file mode 100644 index 0000000000..23c900a8ca --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_fp64 +// A'*B function: GB_AdotB__lxor_le_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int16.c new file mode 100644 index 0000000000..83649ed98d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_int16 +// A'*B function: GB_AdotB__lxor_le_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int32.c new file mode 100644 index 0000000000..a217ebdf10 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_int32 +// A'*B function: GB_AdotB__lxor_le_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int64.c new file mode 100644 index 0000000000..0956436170 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_int64 +// A'*B function: GB_AdotB__lxor_le_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int8.c new file mode 100644 index 0000000000..18af7165ca --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_int8 +// A'*B function: GB_AdotB__lxor_le_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint16.c new file mode 100644 index 0000000000..5412975180 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_uint16 +// A'*B function: GB_AdotB__lxor_le_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint32.c new file mode 100644 index 0000000000..fbcffecae7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_uint32 +// A'*B function: GB_AdotB__lxor_le_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint64.c new file mode 100644 index 0000000000..328d29fb54 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_uint64 +// A'*B function: GB_AdotB__lxor_le_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint8.c new file mode 100644 index 0000000000..99cecca4be --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_le_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_le_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_le_uint8 +// A'*B function: GB_AdotB__lxor_le_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik <= bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki <= bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lor_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lor_bool.c new file mode 100644 index 0000000000..191a32f67a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lor_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lor_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lor_bool +// A'*B function: GB_AdotB__lxor_lor_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) || (bkj != 0) ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) || (bkj != 0) ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_bool.c new file mode 100644 index 0000000000..f1145e83e4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_bool +// A'*B function: GB_AdotB__lxor_lt_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_fp32.c new file mode 100644 index 0000000000..373b93ce9f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_fp32 +// A'*B function: GB_AdotB__lxor_lt_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_fp64.c new file mode 100644 index 0000000000..fa89b0d056 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_fp64 +// A'*B function: GB_AdotB__lxor_lt_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int16.c new file mode 100644 index 0000000000..754e3daaf8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_int16 +// A'*B function: GB_AdotB__lxor_lt_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int32.c new file mode 100644 index 0000000000..d5ed77875b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_int32 +// A'*B function: GB_AdotB__lxor_lt_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int64.c new file mode 100644 index 0000000000..5701761a95 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_int64 +// A'*B function: GB_AdotB__lxor_lt_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int8.c new file mode 100644 index 0000000000..46196131dc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_int8 +// A'*B function: GB_AdotB__lxor_lt_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint16.c new file mode 100644 index 0000000000..5dd8564172 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_uint16 +// A'*B function: GB_AdotB__lxor_lt_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint32.c new file mode 100644 index 0000000000..17fe9d30a3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_uint32 +// A'*B function: GB_AdotB__lxor_lt_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint64.c new file mode 100644 index 0000000000..eb196d810e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_uint64 +// A'*B function: GB_AdotB__lxor_lt_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint8.c new file mode 100644 index 0000000000..2e4347a5df --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lt_uint8 +// A'*B function: GB_AdotB__lxor_lt_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik < bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki < bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_lxor_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_lxor_bool.c new file mode 100644 index 0000000000..8aac7cb990 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_lxor_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_lxor_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_lxor_bool +// A'*B function: GB_AdotB__lxor_lxor_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = (aik != 0) != (bkj != 0) ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = (aki != 0) != (bkj != 0) ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_fp32.c new file mode 100644 index 0000000000..0d2ad19f17 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_fp32 +// A'*B function: GB_AdotB__lxor_ne_fp32 +// Z type : bool (the type of C) +// XY type: float (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_fp64.c new file mode 100644 index 0000000000..750fa42eff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_fp64 +// A'*B function: GB_AdotB__lxor_ne_fp64 +// Z type : bool (the type of C) +// XY type: double (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int16.c new file mode 100644 index 0000000000..c2f01ae19a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_int16 +// A'*B function: GB_AdotB__lxor_ne_int16 +// Z type : bool (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int32.c new file mode 100644 index 0000000000..08caa73f68 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_int32 +// A'*B function: GB_AdotB__lxor_ne_int32 +// Z type : bool (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int64.c new file mode 100644 index 0000000000..77f47a5d5c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_int64 +// A'*B function: GB_AdotB__lxor_ne_int64 +// Z type : bool (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int8.c new file mode 100644 index 0000000000..948a11cf35 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_int8 +// A'*B function: GB_AdotB__lxor_ne_int8 +// Z type : bool (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint16.c new file mode 100644 index 0000000000..b273cc0b45 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_uint16 +// A'*B function: GB_AdotB__lxor_ne_uint16 +// Z type : bool (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint32.c new file mode 100644 index 0000000000..2ff4a57930 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_uint32 +// A'*B function: GB_AdotB__lxor_ne_uint32 +// Z type : bool (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint64.c new file mode 100644 index 0000000000..21e79a104a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_uint64 +// A'*B function: GB_AdotB__lxor_ne_uint64 +// Z type : bool (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint8.c new file mode 100644 index 0000000000..21b94da7f0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_ne_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_ne_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_ne_uint8 +// A'*B function: GB_AdotB__lxor_ne_uint8 +// Z type : bool (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + bool t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + bool t = aik != bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = aki != bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__lxor_second_bool.c b/GraphBLAS/Source/Generated/GB_AxB__lxor_second_bool.c new file mode 100644 index 0000000000..a36ec88ff0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__lxor_second_bool.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__lxor_second_bool: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__lxor_second_bool +// A'*B function: GB_AdotB__lxor_second_bool +// Z type : bool (the type of C) +// XY type: bool (the type of A and B) +// Identity: false (where cij = (cij != false) does not change cij) +// Multiply: t = (bkj) +// Add: cij = (cij != t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__lxor_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + bool *restrict w = GB_thread_local.Work ; + + bool *restrict Cx = C->x ; + const bool *restrict Ax = A->x ; + const bool *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + bool aik = Ax [pa] ; + bool t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = (w [i] != t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = false ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = false ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + bool bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + bool aik = Ax [pa] ; + bool t = bkj ; + w [i] = (w [i] != t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__lxor_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + bool aki = Ax [pa++] ; /* aki = A(k,i) */ \ + bool bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + bool t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = (cij != t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + bool *Cx = C->x ; + const bool *Ax = A->x ; + const bool *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + bool cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for (int64_t k = 0 ; k < nrows ; k++) + { + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij != t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + bool aki = Ax [pa + k] ; // aki = A(k,i) + bool bkj = Bx [pb] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij != t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = false ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + bool aki = Ax [pa] ; // aki = A(k,i) + bool bkj = Bx [pb + k] ; // bkj = B(k,j) + bool t = bkj ; + cij = (cij != t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_fp32.c new file mode 100644 index 0000000000..f93cdacabe --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_fp32 +// A'*B function: GB_AdotB__max_div_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (flip ? (bkj/aik) : (aik/bkj)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = flip ? (bkj/aik) : (aik/bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = flip ? (bkj/aik) : (aik/bkj) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = flip ? (bkj/aki) : (aki/bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_fp64.c new file mode 100644 index 0000000000..43404d2998 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_fp64 +// A'*B function: GB_AdotB__max_div_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (flip ? (bkj/aik) : (aik/bkj)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = flip ? (bkj/aik) : (aik/bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = flip ? (bkj/aik) : (aik/bkj) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = flip ? (bkj/aki) : (aki/bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_int16.c new file mode 100644 index 0000000000..378c412050 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_int16 +// A'*B function: GB_AdotB__max_div_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_int32.c new file mode 100644 index 0000000000..7fbb1edd51 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_int32 +// A'*B function: GB_AdotB__max_div_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_int64.c new file mode 100644 index 0000000000..5c1bee5122 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_int64 +// A'*B function: GB_AdotB__max_div_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_int8.c new file mode 100644 index 0000000000..ffcc3a9fd9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_int8 +// A'*B function: GB_AdotB__max_div_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_uint16.c new file mode 100644 index 0000000000..fa362157b1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_uint16 +// A'*B function: GB_AdotB__max_div_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_uint32.c new file mode 100644 index 0000000000..369ac23555 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_uint32 +// A'*B function: GB_AdotB__max_div_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_uint64.c new file mode 100644 index 0000000000..ab6e41f545 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_uint64 +// A'*B function: GB_AdotB__max_div_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_div_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_div_uint8.c new file mode 100644 index 0000000000..29f344e53a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_div_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_div_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_div_uint8 +// A'*B function: GB_AdotB__max_div_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_fp32.c new file mode 100644 index 0000000000..d6040796e7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_fp32 +// A'*B function: GB_AdotB__max_first_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_fp64.c new file mode 100644 index 0000000000..2a90b0ae54 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_fp64 +// A'*B function: GB_AdotB__max_first_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_int16.c new file mode 100644 index 0000000000..2350633e41 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_int16 +// A'*B function: GB_AdotB__max_first_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_int32.c new file mode 100644 index 0000000000..cf8d6879ec --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_int32 +// A'*B function: GB_AdotB__max_first_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_int64.c new file mode 100644 index 0000000000..43ead2d61c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_int64 +// A'*B function: GB_AdotB__max_first_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_int8.c new file mode 100644 index 0000000000..c3129fe772 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_int8 +// A'*B function: GB_AdotB__max_first_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_uint16.c new file mode 100644 index 0000000000..8b3dab4724 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_uint16 +// A'*B function: GB_AdotB__max_first_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_uint32.c new file mode 100644 index 0000000000..3fb2be62f8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_uint32 +// A'*B function: GB_AdotB__max_first_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_uint64.c new file mode 100644 index 0000000000..456b8331a4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_uint64 +// A'*B function: GB_AdotB__max_first_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_first_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_first_uint8.c new file mode 100644 index 0000000000..9cb638dd54 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_first_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_first_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_first_uint8 +// A'*B function: GB_AdotB__max_first_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_fp32.c new file mode 100644 index 0000000000..779cf24808 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_fp32 +// A'*B function: GB_AdotB__max_iseq_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik == bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki == bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki == bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki == bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_fp64.c new file mode 100644 index 0000000000..0672dc597a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_fp64 +// A'*B function: GB_AdotB__max_iseq_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik == bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki == bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki == bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki == bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int16.c new file mode 100644 index 0000000000..b76a39db15 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_int16 +// A'*B function: GB_AdotB__max_iseq_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik == bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int32.c new file mode 100644 index 0000000000..dd0a17e88d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_int32 +// A'*B function: GB_AdotB__max_iseq_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik == bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int64.c new file mode 100644 index 0000000000..802e4acd5e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_int64 +// A'*B function: GB_AdotB__max_iseq_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik == bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int8.c new file mode 100644 index 0000000000..c7d8680e74 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_int8 +// A'*B function: GB_AdotB__max_iseq_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik == bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint16.c new file mode 100644 index 0000000000..17379b5959 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_uint16 +// A'*B function: GB_AdotB__max_iseq_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik == bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint32.c new file mode 100644 index 0000000000..8dbc700394 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_uint32 +// A'*B function: GB_AdotB__max_iseq_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik == bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint64.c new file mode 100644 index 0000000000..3731158dcb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_uint64 +// A'*B function: GB_AdotB__max_iseq_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik == bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint8.c new file mode 100644 index 0000000000..b3f59a0641 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_iseq_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_iseq_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_iseq_uint8 +// A'*B function: GB_AdotB__max_iseq_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik == bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_fp32.c new file mode 100644 index 0000000000..8ce4358918 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_fp32 +// A'*B function: GB_AdotB__max_isge_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik >= bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_fp64.c new file mode 100644 index 0000000000..65cfa04dc5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_fp64 +// A'*B function: GB_AdotB__max_isge_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik >= bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_int16.c new file mode 100644 index 0000000000..9a3ae3382c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_int16 +// A'*B function: GB_AdotB__max_isge_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik >= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_int32.c new file mode 100644 index 0000000000..a8c19efbc7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_int32 +// A'*B function: GB_AdotB__max_isge_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik >= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_int64.c new file mode 100644 index 0000000000..81f920c4b4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_int64 +// A'*B function: GB_AdotB__max_isge_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik >= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_int8.c new file mode 100644 index 0000000000..0a22dae348 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_int8 +// A'*B function: GB_AdotB__max_isge_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik >= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint16.c new file mode 100644 index 0000000000..e1a8c23040 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_uint16 +// A'*B function: GB_AdotB__max_isge_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik >= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint32.c new file mode 100644 index 0000000000..a34851b3db --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_uint32 +// A'*B function: GB_AdotB__max_isge_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik >= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint64.c new file mode 100644 index 0000000000..1f77ae7adf --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_uint64 +// A'*B function: GB_AdotB__max_isge_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik >= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint8.c new file mode 100644 index 0000000000..71b59dc11d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isge_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isge_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isge_uint8 +// A'*B function: GB_AdotB__max_isge_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik >= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_fp32.c new file mode 100644 index 0000000000..b7377b07d3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_fp32 +// A'*B function: GB_AdotB__max_isgt_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik > bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki > bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki > bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki > bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_fp64.c new file mode 100644 index 0000000000..5f6c3dba3f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_fp64 +// A'*B function: GB_AdotB__max_isgt_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik > bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki > bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki > bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki > bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int16.c new file mode 100644 index 0000000000..d8cca8049e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_int16 +// A'*B function: GB_AdotB__max_isgt_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik > bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int32.c new file mode 100644 index 0000000000..fe97278230 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_int32 +// A'*B function: GB_AdotB__max_isgt_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik > bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int64.c new file mode 100644 index 0000000000..641a4fcc68 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_int64 +// A'*B function: GB_AdotB__max_isgt_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik > bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int8.c new file mode 100644 index 0000000000..cf96559d4d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_int8 +// A'*B function: GB_AdotB__max_isgt_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik > bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint16.c new file mode 100644 index 0000000000..bc158d4f1a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_uint16 +// A'*B function: GB_AdotB__max_isgt_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik > bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint32.c new file mode 100644 index 0000000000..81d92ed57d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_uint32 +// A'*B function: GB_AdotB__max_isgt_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik > bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint64.c new file mode 100644 index 0000000000..b1e08f5d01 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_uint64 +// A'*B function: GB_AdotB__max_isgt_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik > bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint8.c new file mode 100644 index 0000000000..dfcc0c0f3d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isgt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isgt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isgt_uint8 +// A'*B function: GB_AdotB__max_isgt_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik > bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_fp32.c new file mode 100644 index 0000000000..7db2a02fb6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_fp32 +// A'*B function: GB_AdotB__max_isle_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik <= bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_fp64.c new file mode 100644 index 0000000000..21ae933611 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_fp64 +// A'*B function: GB_AdotB__max_isle_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik <= bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_int16.c new file mode 100644 index 0000000000..19045b5103 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_int16 +// A'*B function: GB_AdotB__max_isle_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik <= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_int32.c new file mode 100644 index 0000000000..d8baec1340 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_int32 +// A'*B function: GB_AdotB__max_isle_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik <= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_int64.c new file mode 100644 index 0000000000..ddd0eafad7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_int64 +// A'*B function: GB_AdotB__max_isle_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik <= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_int8.c new file mode 100644 index 0000000000..c0f32bbda9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_int8 +// A'*B function: GB_AdotB__max_isle_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik <= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint16.c new file mode 100644 index 0000000000..433039b820 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_uint16 +// A'*B function: GB_AdotB__max_isle_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik <= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint32.c new file mode 100644 index 0000000000..9c061d371c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_uint32 +// A'*B function: GB_AdotB__max_isle_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik <= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint64.c new file mode 100644 index 0000000000..e5a910dddb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_uint64 +// A'*B function: GB_AdotB__max_isle_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik <= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint8.c new file mode 100644 index 0000000000..98a88f4003 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isle_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isle_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isle_uint8 +// A'*B function: GB_AdotB__max_isle_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik <= bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_fp32.c new file mode 100644 index 0000000000..e6f6e6b1b5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_fp32 +// A'*B function: GB_AdotB__max_islt_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik < bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki < bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki < bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki < bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_fp64.c new file mode 100644 index 0000000000..c322db5b7d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_fp64 +// A'*B function: GB_AdotB__max_islt_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik < bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki < bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki < bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki < bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_int16.c new file mode 100644 index 0000000000..d4e864f47c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_int16 +// A'*B function: GB_AdotB__max_islt_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik < bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_int32.c new file mode 100644 index 0000000000..004a0949d7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_int32 +// A'*B function: GB_AdotB__max_islt_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik < bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_int64.c new file mode 100644 index 0000000000..1382acbd76 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_int64 +// A'*B function: GB_AdotB__max_islt_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik < bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_int8.c new file mode 100644 index 0000000000..fbfe5a69a4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_int8 +// A'*B function: GB_AdotB__max_islt_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik < bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint16.c new file mode 100644 index 0000000000..3266b48c48 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_uint16 +// A'*B function: GB_AdotB__max_islt_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik < bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint32.c new file mode 100644 index 0000000000..cbaba1468e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_uint32 +// A'*B function: GB_AdotB__max_islt_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik < bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint64.c new file mode 100644 index 0000000000..63cb6fe4fe --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_uint64 +// A'*B function: GB_AdotB__max_islt_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik < bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint8.c new file mode 100644 index 0000000000..d925c814d5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_islt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_islt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_islt_uint8 +// A'*B function: GB_AdotB__max_islt_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik < bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_fp32.c new file mode 100644 index 0000000000..f1e485fd86 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_fp32 +// A'*B function: GB_AdotB__max_isne_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik != bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki != bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki != bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki != bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_fp64.c new file mode 100644 index 0000000000..816596f091 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_fp64 +// A'*B function: GB_AdotB__max_isne_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik != bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki != bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki != bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki != bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_int16.c new file mode 100644 index 0000000000..a74677d5b6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_int16 +// A'*B function: GB_AdotB__max_isne_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik != bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_int32.c new file mode 100644 index 0000000000..0ce3cf6daa --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_int32 +// A'*B function: GB_AdotB__max_isne_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik != bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_int64.c new file mode 100644 index 0000000000..a71f9ef015 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_int64 +// A'*B function: GB_AdotB__max_isne_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik != bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_int8.c new file mode 100644 index 0000000000..9e8dff76bb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_int8 +// A'*B function: GB_AdotB__max_isne_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik != bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint16.c new file mode 100644 index 0000000000..defe1d48b8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_uint16 +// A'*B function: GB_AdotB__max_isne_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik != bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint32.c new file mode 100644 index 0000000000..bc39429fd3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_uint32 +// A'*B function: GB_AdotB__max_isne_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik != bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint64.c new file mode 100644 index 0000000000..b8fc232111 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_uint64 +// A'*B function: GB_AdotB__max_isne_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik != bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint8.c new file mode 100644 index 0000000000..2138527669 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_isne_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_isne_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_isne_uint8 +// A'*B function: GB_AdotB__max_isne_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik != bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_fp32.c new file mode 100644 index 0000000000..f6626e8679 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_fp32 +// A'*B function: GB_AdotB__max_land_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) && (bkj != 0) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_fp64.c new file mode 100644 index 0000000000..95edd8de7a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_fp64 +// A'*B function: GB_AdotB__max_land_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) && (bkj != 0) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_int16.c new file mode 100644 index 0000000000..fcd91ea7bf --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_int16 +// A'*B function: GB_AdotB__max_land_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_int32.c new file mode 100644 index 0000000000..241f41f9bf --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_int32 +// A'*B function: GB_AdotB__max_land_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_int64.c new file mode 100644 index 0000000000..7b8e2b6ec0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_int64 +// A'*B function: GB_AdotB__max_land_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_int8.c new file mode 100644 index 0000000000..93ab944342 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_int8 +// A'*B function: GB_AdotB__max_land_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_uint16.c new file mode 100644 index 0000000000..78530f89bf --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_uint16 +// A'*B function: GB_AdotB__max_land_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_uint32.c new file mode 100644 index 0000000000..013200365a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_uint32 +// A'*B function: GB_AdotB__max_land_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_uint64.c new file mode 100644 index 0000000000..bace9a052f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_uint64 +// A'*B function: GB_AdotB__max_land_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_land_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_land_uint8.c new file mode 100644 index 0000000000..647f23ab13 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_land_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_land_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_land_uint8 +// A'*B function: GB_AdotB__max_land_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_fp32.c new file mode 100644 index 0000000000..232e842a2b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_fp32 +// A'*B function: GB_AdotB__max_lor_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) || (bkj != 0) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_fp64.c new file mode 100644 index 0000000000..178a7f1b15 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_fp64 +// A'*B function: GB_AdotB__max_lor_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) || (bkj != 0) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_int16.c new file mode 100644 index 0000000000..118c23bb12 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_int16 +// A'*B function: GB_AdotB__max_lor_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_int32.c new file mode 100644 index 0000000000..6f98fd24cd --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_int32 +// A'*B function: GB_AdotB__max_lor_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_int64.c new file mode 100644 index 0000000000..653d07153b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_int64 +// A'*B function: GB_AdotB__max_lor_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_int8.c new file mode 100644 index 0000000000..9674876713 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_int8 +// A'*B function: GB_AdotB__max_lor_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint16.c new file mode 100644 index 0000000000..258a00d0fe --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_uint16 +// A'*B function: GB_AdotB__max_lor_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint32.c new file mode 100644 index 0000000000..ce7ebbb9d1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_uint32 +// A'*B function: GB_AdotB__max_lor_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint64.c new file mode 100644 index 0000000000..d8d1a13279 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_uint64 +// A'*B function: GB_AdotB__max_lor_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint8.c new file mode 100644 index 0000000000..dca9014814 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lor_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lor_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lor_uint8 +// A'*B function: GB_AdotB__max_lor_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_fp32.c new file mode 100644 index 0000000000..4ca21a8089 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_fp32 +// A'*B function: GB_AdotB__max_lxor_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) != (bkj != 0) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_fp64.c new file mode 100644 index 0000000000..ad08fc00b0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_fp64 +// A'*B function: GB_AdotB__max_lxor_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) != (bkj != 0) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int16.c new file mode 100644 index 0000000000..901ae9253e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_int16 +// A'*B function: GB_AdotB__max_lxor_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int32.c new file mode 100644 index 0000000000..cee31325e5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_int32 +// A'*B function: GB_AdotB__max_lxor_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int64.c new file mode 100644 index 0000000000..7ce114bf04 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_int64 +// A'*B function: GB_AdotB__max_lxor_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int8.c new file mode 100644 index 0000000000..b69ba4c4ea --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_int8 +// A'*B function: GB_AdotB__max_lxor_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint16.c new file mode 100644 index 0000000000..afa4b53011 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_uint16 +// A'*B function: GB_AdotB__max_lxor_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint32.c new file mode 100644 index 0000000000..378be11ddf --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_uint32 +// A'*B function: GB_AdotB__max_lxor_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint64.c new file mode 100644 index 0000000000..d3dbdc366a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_uint64 +// A'*B function: GB_AdotB__max_lxor_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint8.c new file mode 100644 index 0000000000..c30192804a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_lxor_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_lxor_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_lxor_uint8 +// A'*B function: GB_AdotB__max_lxor_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_fp32.c new file mode 100644 index 0000000000..f31450e1ae --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_fp32 +// A'*B function: GB_AdotB__max_max_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (FMAX(aik,bkj)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = FMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = FMAX(aik,bkj) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = FMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_fp64.c new file mode 100644 index 0000000000..711b31ffa2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_fp64 +// A'*B function: GB_AdotB__max_max_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (FMAX(aik,bkj)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = FMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = FMAX(aik,bkj) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = FMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_int16.c new file mode 100644 index 0000000000..51659fa8ff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_int16 +// A'*B function: GB_AdotB__max_max_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = IMAX(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_int32.c new file mode 100644 index 0000000000..716a356d72 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_int32 +// A'*B function: GB_AdotB__max_max_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = IMAX(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_int64.c new file mode 100644 index 0000000000..097ad8dad9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_int64 +// A'*B function: GB_AdotB__max_max_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = IMAX(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_int8.c new file mode 100644 index 0000000000..56b01e1764 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_int8 +// A'*B function: GB_AdotB__max_max_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = IMAX(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_uint16.c new file mode 100644 index 0000000000..931b8f7317 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_uint16 +// A'*B function: GB_AdotB__max_max_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = IMAX(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_uint32.c new file mode 100644 index 0000000000..a9475ce545 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_uint32 +// A'*B function: GB_AdotB__max_max_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = IMAX(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_uint64.c new file mode 100644 index 0000000000..d06606c78b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_uint64 +// A'*B function: GB_AdotB__max_max_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = IMAX(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_max_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_max_uint8.c new file mode 100644 index 0000000000..c25e8cfb22 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_max_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_max_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_max_uint8 +// A'*B function: GB_AdotB__max_max_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = IMAX(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_fp32.c new file mode 100644 index 0000000000..3b50d9e859 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_fp32 +// A'*B function: GB_AdotB__max_min_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (FMIN(aik,bkj)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = FMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = FMIN(aik,bkj) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = FMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_fp64.c new file mode 100644 index 0000000000..91ac342ae1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_fp64 +// A'*B function: GB_AdotB__max_min_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (FMIN(aik,bkj)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = FMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = FMIN(aik,bkj) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = FMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_int16.c new file mode 100644 index 0000000000..ad2b70277b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_int16 +// A'*B function: GB_AdotB__max_min_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = IMIN(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_int32.c new file mode 100644 index 0000000000..2e25474307 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_int32 +// A'*B function: GB_AdotB__max_min_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = IMIN(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_int64.c new file mode 100644 index 0000000000..045b53db59 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_int64 +// A'*B function: GB_AdotB__max_min_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = IMIN(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_int8.c new file mode 100644 index 0000000000..b1492abcf0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_int8 +// A'*B function: GB_AdotB__max_min_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = IMIN(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_uint16.c new file mode 100644 index 0000000000..1f50b5d56a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_uint16 +// A'*B function: GB_AdotB__max_min_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = IMIN(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_uint32.c new file mode 100644 index 0000000000..d32fd3d7de --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_uint32 +// A'*B function: GB_AdotB__max_min_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = IMIN(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_uint64.c new file mode 100644 index 0000000000..5f3c5771ca --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_uint64 +// A'*B function: GB_AdotB__max_min_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = IMIN(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_min_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_min_uint8.c new file mode 100644 index 0000000000..8771303529 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_min_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_min_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_min_uint8 +// A'*B function: GB_AdotB__max_min_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = IMIN(aik,bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_fp32.c new file mode 100644 index 0000000000..1b4339e146 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_fp32 +// A'*B function: GB_AdotB__max_minus_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_fp64.c new file mode 100644 index 0000000000..6f6f4aed65 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_fp64 +// A'*B function: GB_AdotB__max_minus_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_int16.c new file mode 100644 index 0000000000..0f25903c3e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_int16 +// A'*B function: GB_AdotB__max_minus_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_int32.c new file mode 100644 index 0000000000..0a9ec2091c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_int32 +// A'*B function: GB_AdotB__max_minus_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_int64.c new file mode 100644 index 0000000000..4bcf24e7c1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_int64 +// A'*B function: GB_AdotB__max_minus_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_int8.c new file mode 100644 index 0000000000..abf556d739 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_int8 +// A'*B function: GB_AdotB__max_minus_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint16.c new file mode 100644 index 0000000000..90670510c3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_uint16 +// A'*B function: GB_AdotB__max_minus_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint32.c new file mode 100644 index 0000000000..f39065e7ec --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_uint32 +// A'*B function: GB_AdotB__max_minus_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint64.c new file mode 100644 index 0000000000..9f73a1f8b4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_uint64 +// A'*B function: GB_AdotB__max_minus_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint8.c new file mode 100644 index 0000000000..a47bd6c79b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_minus_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_minus_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_minus_uint8 +// A'*B function: GB_AdotB__max_minus_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_fp32.c new file mode 100644 index 0000000000..f10dffff7a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_fp32 +// A'*B function: GB_AdotB__max_plus_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik + bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki + bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki + bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki + bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_fp64.c new file mode 100644 index 0000000000..8ef9233fc8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_fp64 +// A'*B function: GB_AdotB__max_plus_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik + bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki + bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki + bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki + bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_int16.c new file mode 100644 index 0000000000..523e0e6762 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_int16 +// A'*B function: GB_AdotB__max_plus_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik + bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_int32.c new file mode 100644 index 0000000000..0c08c1d96c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_int32 +// A'*B function: GB_AdotB__max_plus_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik + bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_int64.c new file mode 100644 index 0000000000..e0cf34212e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_int64 +// A'*B function: GB_AdotB__max_plus_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik + bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_int8.c new file mode 100644 index 0000000000..9f22af0196 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_int8 +// A'*B function: GB_AdotB__max_plus_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik + bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint16.c new file mode 100644 index 0000000000..ff84be2e2f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_uint16 +// A'*B function: GB_AdotB__max_plus_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik + bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint32.c new file mode 100644 index 0000000000..4eb9c02581 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_uint32 +// A'*B function: GB_AdotB__max_plus_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik + bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint64.c new file mode 100644 index 0000000000..659622e544 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_uint64 +// A'*B function: GB_AdotB__max_plus_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik + bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint8.c new file mode 100644 index 0000000000..0578c87003 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_plus_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_plus_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_plus_uint8 +// A'*B function: GB_AdotB__max_plus_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik + bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_fp32.c new file mode 100644 index 0000000000..bb7f2b747c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_fp32 +// A'*B function: GB_AdotB__max_second_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_fp64.c new file mode 100644 index 0000000000..1a79e436e0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_fp64 +// A'*B function: GB_AdotB__max_second_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_int16.c new file mode 100644 index 0000000000..0124d718b0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_int16 +// A'*B function: GB_AdotB__max_second_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_int32.c new file mode 100644 index 0000000000..e9361699c8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_int32 +// A'*B function: GB_AdotB__max_second_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_int64.c new file mode 100644 index 0000000000..5acfc364c1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_int64 +// A'*B function: GB_AdotB__max_second_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_int8.c new file mode 100644 index 0000000000..e242a50091 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_int8 +// A'*B function: GB_AdotB__max_second_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_uint16.c new file mode 100644 index 0000000000..7a107595f9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_uint16 +// A'*B function: GB_AdotB__max_second_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_uint32.c new file mode 100644 index 0000000000..08cbc0d05d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_uint32 +// A'*B function: GB_AdotB__max_second_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_uint64.c new file mode 100644 index 0000000000..b2f0a0c068 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_uint64 +// A'*B function: GB_AdotB__max_second_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_second_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_second_uint8.c new file mode 100644 index 0000000000..49e521c6c7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_second_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_second_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_second_uint8 +// A'*B function: GB_AdotB__max_second_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_fp32.c new file mode 100644 index 0000000000..b641baebd1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_fp32 +// A'*B function: GB_AdotB__max_times_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik * bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki * bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki * bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki * bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_fp64.c new file mode 100644 index 0000000000..3955947608 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_fp64 +// A'*B function: GB_AdotB__max_times_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: -INFINITY (where cij = FMAX (cij,-INFINITY) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = FMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = -INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = -INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik * bkj ; + w [i] = FMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki * bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki * bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = -INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki * bkj ; + cij = FMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_int16.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_int16.c new file mode 100644 index 0000000000..7a5e96f6b8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_int16 +// A'*B function: GB_AdotB__max_times_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MIN (where cij = IMAX (cij,INT16_MIN) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik * bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_int32.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_int32.c new file mode 100644 index 0000000000..ac11ba2248 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_int32 +// A'*B function: GB_AdotB__max_times_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MIN (where cij = IMAX (cij,INT32_MIN) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik * bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_int64.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_int64.c new file mode 100644 index 0000000000..d9e5f32856 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_int64 +// A'*B function: GB_AdotB__max_times_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MIN (where cij = IMAX (cij,INT64_MIN) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik * bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_int8.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_int8.c new file mode 100644 index 0000000000..8509f98983 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_int8 +// A'*B function: GB_AdotB__max_times_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MIN (where cij = IMAX (cij,INT8_MIN) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MIN ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MIN ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik * bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MIN ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_uint16.c new file mode 100644 index 0000000000..8615151574 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_uint16 +// A'*B function: GB_AdotB__max_times_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik * bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_uint32.c new file mode 100644 index 0000000000..72bb99bff7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_uint32 +// A'*B function: GB_AdotB__max_times_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik * bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_uint64.c new file mode 100644 index 0000000000..2a82e6a128 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_uint64 +// A'*B function: GB_AdotB__max_times_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik * bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__max_times_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__max_times_uint8.c new file mode 100644 index 0000000000..afeb1e1770 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__max_times_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__max_times_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__max_times_uint8 +// A'*B function: GB_AdotB__max_times_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij = IMAX (cij,0) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMAX (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__max_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMAX (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik * bkj ; + w [i] = IMAX (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__max_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMAX (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij = IMAX (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_fp32.c new file mode 100644 index 0000000000..a3d19df21a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_fp32 +// A'*B function: GB_AdotB__min_div_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (flip ? (bkj/aik) : (aik/bkj)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = flip ? (bkj/aik) : (aik/bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = flip ? (bkj/aik) : (aik/bkj) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = flip ? (bkj/aki) : (aki/bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_fp64.c new file mode 100644 index 0000000000..11dcb9a4f3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_fp64 +// A'*B function: GB_AdotB__min_div_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (flip ? (bkj/aik) : (aik/bkj)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = flip ? (bkj/aik) : (aik/bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = flip ? (bkj/aik) : (aik/bkj) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = flip ? (bkj/aki) : (aki/bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_int16.c new file mode 100644 index 0000000000..189e09694e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_int16 +// A'*B function: GB_AdotB__min_div_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_int32.c new file mode 100644 index 0000000000..3274c53f50 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_int32 +// A'*B function: GB_AdotB__min_div_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_int64.c new file mode 100644 index 0000000000..8b23503e11 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_int64 +// A'*B function: GB_AdotB__min_div_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_int8.c new file mode 100644 index 0000000000..dbd400cd5d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_int8 +// A'*B function: GB_AdotB__min_div_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_uint16.c new file mode 100644 index 0000000000..a08b86bd12 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_uint16 +// A'*B function: GB_AdotB__min_div_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_uint32.c new file mode 100644 index 0000000000..569f4f500a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_uint32 +// A'*B function: GB_AdotB__min_div_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_uint64.c new file mode 100644 index 0000000000..3ff04947de --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_uint64 +// A'*B function: GB_AdotB__min_div_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_div_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_div_uint8.c new file mode 100644 index 0000000000..a68076998f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_div_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_div_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_div_uint8 +// A'*B function: GB_AdotB__min_div_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_fp32.c new file mode 100644 index 0000000000..31d9f1fcde --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_fp32 +// A'*B function: GB_AdotB__min_first_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_fp64.c new file mode 100644 index 0000000000..1c0d5e451e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_fp64 +// A'*B function: GB_AdotB__min_first_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_int16.c new file mode 100644 index 0000000000..2e56375843 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_int16 +// A'*B function: GB_AdotB__min_first_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_int32.c new file mode 100644 index 0000000000..ac9a5cd7f6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_int32 +// A'*B function: GB_AdotB__min_first_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_int64.c new file mode 100644 index 0000000000..a9ac13a469 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_int64 +// A'*B function: GB_AdotB__min_first_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_int8.c new file mode 100644 index 0000000000..bac2d6fb3e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_int8 +// A'*B function: GB_AdotB__min_first_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_uint16.c new file mode 100644 index 0000000000..59f18ed089 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_uint16 +// A'*B function: GB_AdotB__min_first_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_uint32.c new file mode 100644 index 0000000000..5f52562a53 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_uint32 +// A'*B function: GB_AdotB__min_first_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_uint64.c new file mode 100644 index 0000000000..53f6915eeb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_uint64 +// A'*B function: GB_AdotB__min_first_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_first_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_first_uint8.c new file mode 100644 index 0000000000..60b3923d1b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_first_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_first_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_first_uint8 +// A'*B function: GB_AdotB__min_first_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (aik) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_fp32.c new file mode 100644 index 0000000000..38cb189206 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_fp32 +// A'*B function: GB_AdotB__min_iseq_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik == bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki == bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki == bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki == bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_fp64.c new file mode 100644 index 0000000000..fcbf64d7a3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_fp64 +// A'*B function: GB_AdotB__min_iseq_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik == bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki == bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki == bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki == bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int16.c new file mode 100644 index 0000000000..fd97466d7d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_int16 +// A'*B function: GB_AdotB__min_iseq_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik == bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int32.c new file mode 100644 index 0000000000..81d328e7de --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_int32 +// A'*B function: GB_AdotB__min_iseq_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik == bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int64.c new file mode 100644 index 0000000000..933b1f39fd --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_int64 +// A'*B function: GB_AdotB__min_iseq_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik == bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int8.c new file mode 100644 index 0000000000..a30efb8924 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_int8 +// A'*B function: GB_AdotB__min_iseq_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik == bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint16.c new file mode 100644 index 0000000000..2ccac2c52c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_uint16 +// A'*B function: GB_AdotB__min_iseq_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik == bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint32.c new file mode 100644 index 0000000000..3a45a16c3b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_uint32 +// A'*B function: GB_AdotB__min_iseq_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik == bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint64.c new file mode 100644 index 0000000000..e65478a3b2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_uint64 +// A'*B function: GB_AdotB__min_iseq_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik == bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint8.c new file mode 100644 index 0000000000..5dd2eb0b99 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_iseq_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_iseq_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_iseq_uint8 +// A'*B function: GB_AdotB__min_iseq_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik == bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_fp32.c new file mode 100644 index 0000000000..c59b81bc3a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_fp32 +// A'*B function: GB_AdotB__min_isge_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik >= bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_fp64.c new file mode 100644 index 0000000000..630fb76bd4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_fp64 +// A'*B function: GB_AdotB__min_isge_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik >= bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_int16.c new file mode 100644 index 0000000000..90f7b76586 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_int16 +// A'*B function: GB_AdotB__min_isge_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik >= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_int32.c new file mode 100644 index 0000000000..8350ba052a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_int32 +// A'*B function: GB_AdotB__min_isge_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik >= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_int64.c new file mode 100644 index 0000000000..108591f134 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_int64 +// A'*B function: GB_AdotB__min_isge_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik >= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_int8.c new file mode 100644 index 0000000000..650a40016f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_int8 +// A'*B function: GB_AdotB__min_isge_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik >= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint16.c new file mode 100644 index 0000000000..cb8b59f1a1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_uint16 +// A'*B function: GB_AdotB__min_isge_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik >= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint32.c new file mode 100644 index 0000000000..80385b5402 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_uint32 +// A'*B function: GB_AdotB__min_isge_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik >= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint64.c new file mode 100644 index 0000000000..ebd96dd7a5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_uint64 +// A'*B function: GB_AdotB__min_isge_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik >= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint8.c new file mode 100644 index 0000000000..a8e42150fb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isge_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isge_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isge_uint8 +// A'*B function: GB_AdotB__min_isge_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik >= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_fp32.c new file mode 100644 index 0000000000..6b5547c182 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_fp32 +// A'*B function: GB_AdotB__min_isgt_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik > bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki > bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki > bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki > bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_fp64.c new file mode 100644 index 0000000000..3b25d7b62b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_fp64 +// A'*B function: GB_AdotB__min_isgt_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik > bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki > bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki > bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki > bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int16.c new file mode 100644 index 0000000000..6e8525d60c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_int16 +// A'*B function: GB_AdotB__min_isgt_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik > bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int32.c new file mode 100644 index 0000000000..2303122e68 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_int32 +// A'*B function: GB_AdotB__min_isgt_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik > bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int64.c new file mode 100644 index 0000000000..6b024bc893 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_int64 +// A'*B function: GB_AdotB__min_isgt_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik > bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int8.c new file mode 100644 index 0000000000..70e22adb74 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_int8 +// A'*B function: GB_AdotB__min_isgt_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik > bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint16.c new file mode 100644 index 0000000000..0ce4efef6f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_uint16 +// A'*B function: GB_AdotB__min_isgt_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik > bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint32.c new file mode 100644 index 0000000000..bdb99a32c6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_uint32 +// A'*B function: GB_AdotB__min_isgt_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik > bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint64.c new file mode 100644 index 0000000000..704ddd60f3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_uint64 +// A'*B function: GB_AdotB__min_isgt_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik > bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint8.c new file mode 100644 index 0000000000..fa73f4db7c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isgt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isgt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isgt_uint8 +// A'*B function: GB_AdotB__min_isgt_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik > bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_fp32.c new file mode 100644 index 0000000000..31b1c2c1ae --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_fp32 +// A'*B function: GB_AdotB__min_isle_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik <= bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_fp64.c new file mode 100644 index 0000000000..f2436ea446 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_fp64 +// A'*B function: GB_AdotB__min_isle_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik <= bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_int16.c new file mode 100644 index 0000000000..fe080f504c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_int16 +// A'*B function: GB_AdotB__min_isle_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik <= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_int32.c new file mode 100644 index 0000000000..cbe66a22c4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_int32 +// A'*B function: GB_AdotB__min_isle_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik <= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_int64.c new file mode 100644 index 0000000000..1fb5318cbb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_int64 +// A'*B function: GB_AdotB__min_isle_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik <= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_int8.c new file mode 100644 index 0000000000..dc14bd7b67 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_int8 +// A'*B function: GB_AdotB__min_isle_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik <= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint16.c new file mode 100644 index 0000000000..dcd2d31829 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_uint16 +// A'*B function: GB_AdotB__min_isle_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik <= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint32.c new file mode 100644 index 0000000000..ed368b5ef1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_uint32 +// A'*B function: GB_AdotB__min_isle_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik <= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint64.c new file mode 100644 index 0000000000..f55baa153a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_uint64 +// A'*B function: GB_AdotB__min_isle_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik <= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint8.c new file mode 100644 index 0000000000..c04eba74f2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isle_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isle_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isle_uint8 +// A'*B function: GB_AdotB__min_isle_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik <= bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_fp32.c new file mode 100644 index 0000000000..2a348b86f0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_fp32 +// A'*B function: GB_AdotB__min_islt_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik < bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki < bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki < bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki < bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_fp64.c new file mode 100644 index 0000000000..da24acda90 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_fp64 +// A'*B function: GB_AdotB__min_islt_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik < bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki < bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki < bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki < bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_int16.c new file mode 100644 index 0000000000..ed20715c8d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_int16 +// A'*B function: GB_AdotB__min_islt_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik < bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_int32.c new file mode 100644 index 0000000000..93018eae6f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_int32 +// A'*B function: GB_AdotB__min_islt_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik < bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_int64.c new file mode 100644 index 0000000000..31ccc833ae --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_int64 +// A'*B function: GB_AdotB__min_islt_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik < bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_int8.c new file mode 100644 index 0000000000..fdc0f82997 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_int8 +// A'*B function: GB_AdotB__min_islt_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik < bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint16.c new file mode 100644 index 0000000000..ce7b01f4b5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_uint16 +// A'*B function: GB_AdotB__min_islt_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik < bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint32.c new file mode 100644 index 0000000000..2cdf600c80 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_uint32 +// A'*B function: GB_AdotB__min_islt_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik < bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint64.c new file mode 100644 index 0000000000..ed73b2f5c8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_uint64 +// A'*B function: GB_AdotB__min_islt_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik < bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint8.c new file mode 100644 index 0000000000..9d76b92b8a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_islt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_islt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_islt_uint8 +// A'*B function: GB_AdotB__min_islt_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik < bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_fp32.c new file mode 100644 index 0000000000..b0951b086d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_fp32 +// A'*B function: GB_AdotB__min_isne_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik != bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki != bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki != bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki != bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_fp64.c new file mode 100644 index 0000000000..05a7c3f4ce --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_fp64 +// A'*B function: GB_AdotB__min_isne_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik != bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki != bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki != bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki != bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_int16.c new file mode 100644 index 0000000000..cb2e87a934 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_int16 +// A'*B function: GB_AdotB__min_isne_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik != bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_int32.c new file mode 100644 index 0000000000..4777d5d0ee --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_int32 +// A'*B function: GB_AdotB__min_isne_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik != bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_int64.c new file mode 100644 index 0000000000..e8897f3568 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_int64 +// A'*B function: GB_AdotB__min_isne_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik != bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_int8.c new file mode 100644 index 0000000000..2291dea709 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_int8 +// A'*B function: GB_AdotB__min_isne_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik != bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint16.c new file mode 100644 index 0000000000..7141433147 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_uint16 +// A'*B function: GB_AdotB__min_isne_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik != bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint32.c new file mode 100644 index 0000000000..2bfb887c2e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_uint32 +// A'*B function: GB_AdotB__min_isne_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik != bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint64.c new file mode 100644 index 0000000000..0ac25fca79 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_uint64 +// A'*B function: GB_AdotB__min_isne_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik != bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint8.c new file mode 100644 index 0000000000..6662a6011e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_isne_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_isne_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_isne_uint8 +// A'*B function: GB_AdotB__min_isne_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik != bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_fp32.c new file mode 100644 index 0000000000..a0f9f46865 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_fp32 +// A'*B function: GB_AdotB__min_land_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) && (bkj != 0) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_fp64.c new file mode 100644 index 0000000000..323dc1683d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_fp64 +// A'*B function: GB_AdotB__min_land_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) && (bkj != 0) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_int16.c new file mode 100644 index 0000000000..47e2e05999 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_int16 +// A'*B function: GB_AdotB__min_land_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_int32.c new file mode 100644 index 0000000000..49878f0707 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_int32 +// A'*B function: GB_AdotB__min_land_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_int64.c new file mode 100644 index 0000000000..55c268fba4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_int64 +// A'*B function: GB_AdotB__min_land_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_int8.c new file mode 100644 index 0000000000..cf28970355 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_int8 +// A'*B function: GB_AdotB__min_land_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_uint16.c new file mode 100644 index 0000000000..c96b087c8f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_uint16 +// A'*B function: GB_AdotB__min_land_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_uint32.c new file mode 100644 index 0000000000..e1c7dc5145 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_uint32 +// A'*B function: GB_AdotB__min_land_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_uint64.c new file mode 100644 index 0000000000..713b3fd9fa --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_uint64 +// A'*B function: GB_AdotB__min_land_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_land_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_land_uint8.c new file mode 100644 index 0000000000..f5a08d37f3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_land_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_land_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_land_uint8 +// A'*B function: GB_AdotB__min_land_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) && (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_fp32.c new file mode 100644 index 0000000000..aa85f5f659 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_fp32 +// A'*B function: GB_AdotB__min_lor_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) || (bkj != 0) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_fp64.c new file mode 100644 index 0000000000..c5b3a161c0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_fp64 +// A'*B function: GB_AdotB__min_lor_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) || (bkj != 0) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_int16.c new file mode 100644 index 0000000000..308e050c3c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_int16 +// A'*B function: GB_AdotB__min_lor_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_int32.c new file mode 100644 index 0000000000..6f923ef898 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_int32 +// A'*B function: GB_AdotB__min_lor_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_int64.c new file mode 100644 index 0000000000..9ed72e47b3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_int64 +// A'*B function: GB_AdotB__min_lor_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_int8.c new file mode 100644 index 0000000000..ac217b19f3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_int8 +// A'*B function: GB_AdotB__min_lor_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint16.c new file mode 100644 index 0000000000..d332ca27b4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_uint16 +// A'*B function: GB_AdotB__min_lor_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint32.c new file mode 100644 index 0000000000..bac693bd49 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_uint32 +// A'*B function: GB_AdotB__min_lor_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint64.c new file mode 100644 index 0000000000..b580109f63 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_uint64 +// A'*B function: GB_AdotB__min_lor_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint8.c new file mode 100644 index 0000000000..93fd80d2e0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lor_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lor_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lor_uint8 +// A'*B function: GB_AdotB__min_lor_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) || (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_fp32.c new file mode 100644 index 0000000000..983bac1b92 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_fp32 +// A'*B function: GB_AdotB__min_lxor_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) != (bkj != 0) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_fp64.c new file mode 100644 index 0000000000..2fd22d7204 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_fp64 +// A'*B function: GB_AdotB__min_lxor_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) != (bkj != 0) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int16.c new file mode 100644 index 0000000000..c72ad7d2b8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_int16 +// A'*B function: GB_AdotB__min_lxor_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int32.c new file mode 100644 index 0000000000..39c32502b6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_int32 +// A'*B function: GB_AdotB__min_lxor_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int64.c new file mode 100644 index 0000000000..023c6ec937 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_int64 +// A'*B function: GB_AdotB__min_lxor_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int8.c new file mode 100644 index 0000000000..c78f7e88f7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_int8 +// A'*B function: GB_AdotB__min_lxor_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint16.c new file mode 100644 index 0000000000..628b85d409 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_uint16 +// A'*B function: GB_AdotB__min_lxor_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint32.c new file mode 100644 index 0000000000..6d0c2925aa --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_uint32 +// A'*B function: GB_AdotB__min_lxor_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint64.c new file mode 100644 index 0000000000..0d700f36a9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_uint64 +// A'*B function: GB_AdotB__min_lxor_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint8.c new file mode 100644 index 0000000000..949c992560 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_lxor_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_lxor_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_lxor_uint8 +// A'*B function: GB_AdotB__min_lxor_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) != (bkj != 0) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_fp32.c new file mode 100644 index 0000000000..7864d05478 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_fp32 +// A'*B function: GB_AdotB__min_max_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (FMAX(aik,bkj)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = FMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = FMAX(aik,bkj) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = FMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_fp64.c new file mode 100644 index 0000000000..3a11ab1d76 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_fp64 +// A'*B function: GB_AdotB__min_max_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (FMAX(aik,bkj)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = FMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = FMAX(aik,bkj) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = FMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_int16.c new file mode 100644 index 0000000000..52522d7815 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_int16 +// A'*B function: GB_AdotB__min_max_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = IMAX(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_int32.c new file mode 100644 index 0000000000..227c00e8f6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_int32 +// A'*B function: GB_AdotB__min_max_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = IMAX(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_int64.c new file mode 100644 index 0000000000..42dcb9a76d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_int64 +// A'*B function: GB_AdotB__min_max_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = IMAX(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_int8.c new file mode 100644 index 0000000000..d9d814dfa1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_int8 +// A'*B function: GB_AdotB__min_max_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = IMAX(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_uint16.c new file mode 100644 index 0000000000..3cd0c7f33d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_uint16 +// A'*B function: GB_AdotB__min_max_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = IMAX(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_uint32.c new file mode 100644 index 0000000000..b72197f02a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_uint32 +// A'*B function: GB_AdotB__min_max_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = IMAX(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_uint64.c new file mode 100644 index 0000000000..d413e78634 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_uint64 +// A'*B function: GB_AdotB__min_max_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = IMAX(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_max_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_max_uint8.c new file mode 100644 index 0000000000..2cb071f6a4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_max_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_max_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_max_uint8 +// A'*B function: GB_AdotB__min_max_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = IMAX(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_fp32.c new file mode 100644 index 0000000000..9fc2039919 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_fp32 +// A'*B function: GB_AdotB__min_min_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (FMIN(aik,bkj)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = FMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = FMIN(aik,bkj) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = FMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_fp64.c new file mode 100644 index 0000000000..43d24c220e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_fp64 +// A'*B function: GB_AdotB__min_min_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (FMIN(aik,bkj)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = FMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = FMIN(aik,bkj) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = FMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_int16.c new file mode 100644 index 0000000000..6245775f2c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_int16 +// A'*B function: GB_AdotB__min_min_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = IMIN(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_int32.c new file mode 100644 index 0000000000..7df70bd418 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_int32 +// A'*B function: GB_AdotB__min_min_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = IMIN(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_int64.c new file mode 100644 index 0000000000..20339d49cd --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_int64 +// A'*B function: GB_AdotB__min_min_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = IMIN(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_int8.c new file mode 100644 index 0000000000..7e10822633 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_int8 +// A'*B function: GB_AdotB__min_min_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = IMIN(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_uint16.c new file mode 100644 index 0000000000..e708fe7af5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_uint16 +// A'*B function: GB_AdotB__min_min_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = IMIN(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_uint32.c new file mode 100644 index 0000000000..d1244b7885 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_uint32 +// A'*B function: GB_AdotB__min_min_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = IMIN(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_uint64.c new file mode 100644 index 0000000000..9bd4dcd99d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_uint64 +// A'*B function: GB_AdotB__min_min_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = IMIN(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_min_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_min_uint8.c new file mode 100644 index 0000000000..c9e90fdc93 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_min_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_min_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_min_uint8 +// A'*B function: GB_AdotB__min_min_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = IMIN(aik,bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_fp32.c new file mode 100644 index 0000000000..124dcf1fca --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_fp32 +// A'*B function: GB_AdotB__min_minus_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_fp64.c new file mode 100644 index 0000000000..ceda1fea65 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_fp64 +// A'*B function: GB_AdotB__min_minus_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_int16.c new file mode 100644 index 0000000000..7cdcfb3a0f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_int16 +// A'*B function: GB_AdotB__min_minus_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_int32.c new file mode 100644 index 0000000000..c6fe00eeea --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_int32 +// A'*B function: GB_AdotB__min_minus_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_int64.c new file mode 100644 index 0000000000..eb6265bd9b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_int64 +// A'*B function: GB_AdotB__min_minus_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_int8.c new file mode 100644 index 0000000000..1a7bcf2f6a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_int8 +// A'*B function: GB_AdotB__min_minus_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint16.c new file mode 100644 index 0000000000..7dc5a27477 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_uint16 +// A'*B function: GB_AdotB__min_minus_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint32.c new file mode 100644 index 0000000000..3de3a12d3b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_uint32 +// A'*B function: GB_AdotB__min_minus_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint64.c new file mode 100644 index 0000000000..e6652672b0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_uint64 +// A'*B function: GB_AdotB__min_minus_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint8.c new file mode 100644 index 0000000000..cb6e2dca05 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_minus_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_minus_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_minus_uint8 +// A'*B function: GB_AdotB__min_minus_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_fp32.c new file mode 100644 index 0000000000..e979ab974b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_fp32 +// A'*B function: GB_AdotB__min_plus_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik + bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki + bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki + bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki + bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_fp64.c new file mode 100644 index 0000000000..bdee4e16cd --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_fp64 +// A'*B function: GB_AdotB__min_plus_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik + bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki + bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki + bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki + bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_int16.c new file mode 100644 index 0000000000..c2528235a6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_int16 +// A'*B function: GB_AdotB__min_plus_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik + bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_int32.c new file mode 100644 index 0000000000..8428472054 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_int32 +// A'*B function: GB_AdotB__min_plus_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik + bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_int64.c new file mode 100644 index 0000000000..d595a9079d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_int64 +// A'*B function: GB_AdotB__min_plus_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik + bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_int8.c new file mode 100644 index 0000000000..5102b922ca --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_int8 +// A'*B function: GB_AdotB__min_plus_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik + bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint16.c new file mode 100644 index 0000000000..a87b81caf3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_uint16 +// A'*B function: GB_AdotB__min_plus_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik + bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint32.c new file mode 100644 index 0000000000..4bc5c3de2e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_uint32 +// A'*B function: GB_AdotB__min_plus_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik + bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint64.c new file mode 100644 index 0000000000..6bdd0df87d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_uint64 +// A'*B function: GB_AdotB__min_plus_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik + bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint8.c new file mode 100644 index 0000000000..cb926fee37 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_plus_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_plus_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_plus_uint8 +// A'*B function: GB_AdotB__min_plus_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik + bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_fp32.c new file mode 100644 index 0000000000..4cc2abac46 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_fp32 +// A'*B function: GB_AdotB__min_second_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_fp64.c new file mode 100644 index 0000000000..d87dbdc103 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_fp64 +// A'*B function: GB_AdotB__min_second_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_int16.c new file mode 100644 index 0000000000..428064557c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_int16 +// A'*B function: GB_AdotB__min_second_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_int32.c new file mode 100644 index 0000000000..3321f5f009 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_int32 +// A'*B function: GB_AdotB__min_second_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_int64.c new file mode 100644 index 0000000000..a9ff32374c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_int64 +// A'*B function: GB_AdotB__min_second_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_int8.c new file mode 100644 index 0000000000..d64cadc197 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_int8 +// A'*B function: GB_AdotB__min_second_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_uint16.c new file mode 100644 index 0000000000..c5ea5a66e2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_uint16 +// A'*B function: GB_AdotB__min_second_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_uint32.c new file mode 100644 index 0000000000..ee7bcdff08 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_uint32 +// A'*B function: GB_AdotB__min_second_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_uint64.c new file mode 100644 index 0000000000..0c425f7ae9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_uint64 +// A'*B function: GB_AdotB__min_second_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_second_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_second_uint8.c new file mode 100644 index 0000000000..e146b153e3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_second_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_second_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_second_uint8 +// A'*B function: GB_AdotB__min_second_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_fp32.c new file mode 100644 index 0000000000..5908434b7a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_fp32 +// A'*B function: GB_AdotB__min_times_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik * bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki * bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki * bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki * bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_fp64.c new file mode 100644 index 0000000000..72b47d17bd --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_fp64 +// A'*B function: GB_AdotB__min_times_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: INFINITY (where cij = FMIN (cij,INFINITY) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = FMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = FMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INFINITY ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INFINITY ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik * bkj ; + w [i] = FMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = FMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki * bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki * bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INFINITY ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki * bkj ; + cij = FMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_int16.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_int16.c new file mode 100644 index 0000000000..e6ff11b881 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_int16 +// A'*B function: GB_AdotB__min_times_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: INT16_MAX (where cij = IMIN (cij,INT16_MAX) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik * bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_int32.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_int32.c new file mode 100644 index 0000000000..7802bce63b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_int32 +// A'*B function: GB_AdotB__min_times_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: INT32_MAX (where cij = IMIN (cij,INT32_MAX) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik * bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_int64.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_int64.c new file mode 100644 index 0000000000..4a85c184c7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_int64 +// A'*B function: GB_AdotB__min_times_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: INT64_MAX (where cij = IMIN (cij,INT64_MAX) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik * bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_int8.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_int8.c new file mode 100644 index 0000000000..e54c6c1ffc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_int8 +// A'*B function: GB_AdotB__min_times_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: INT8_MAX (where cij = IMIN (cij,INT8_MAX) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = INT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = INT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik * bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = INT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_uint16.c new file mode 100644 index 0000000000..af23667bf3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_uint16 +// A'*B function: GB_AdotB__min_times_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: UINT16_MAX (where cij = IMIN (cij,UINT16_MAX) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT16_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT16_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik * bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT16_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_uint32.c new file mode 100644 index 0000000000..dbe7c62535 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_uint32 +// A'*B function: GB_AdotB__min_times_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: UINT32_MAX (where cij = IMIN (cij,UINT32_MAX) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT32_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT32_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik * bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT32_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_uint64.c new file mode 100644 index 0000000000..0932a881b4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_uint64 +// A'*B function: GB_AdotB__min_times_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: UINT64_MAX (where cij = IMIN (cij,UINT64_MAX) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT64_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT64_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik * bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT64_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__min_times_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__min_times_uint8.c new file mode 100644 index 0000000000..da4233a877 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__min_times_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__min_times_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__min_times_uint8 +// A'*B function: GB_AdotB__min_times_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: UINT8_MAX (where cij = IMIN (cij,UINT8_MAX) does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij = IMIN (cij,t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__min_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] = IMIN (w [i],t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = UINT8_MAX ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = UINT8_MAX ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik * bkj ; + w [i] = IMIN (w [i],t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__min_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij = IMIN (cij,t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = UINT8_MAX ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij = IMIN (cij,t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_fp32.c new file mode 100644 index 0000000000..519e196af0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_fp32 +// A'*B function: GB_AdotB__plus_div_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj/aik) : (aik/bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = flip ? (bkj/aik) : (aik/bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = flip ? (bkj/aik) : (aik/bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = flip ? (bkj/aki) : (aki/bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_fp64.c new file mode 100644 index 0000000000..18b6a2698a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_fp64 +// A'*B function: GB_AdotB__plus_div_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj/aik) : (aik/bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = flip ? (bkj/aik) : (aik/bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = flip ? (bkj/aik) : (aik/bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = flip ? (bkj/aki) : (aki/bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_int16.c new file mode 100644 index 0000000000..e2d0aa77b7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_int16 +// A'*B function: GB_AdotB__plus_div_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_int32.c new file mode 100644 index 0000000000..2d3b6931ac --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_int32 +// A'*B function: GB_AdotB__plus_div_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_int64.c new file mode 100644 index 0000000000..d2e242356f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_int64 +// A'*B function: GB_AdotB__plus_div_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_int8.c new file mode 100644 index 0000000000..976e0b6b59 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_int8 +// A'*B function: GB_AdotB__plus_div_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint16.c new file mode 100644 index 0000000000..a233aaf140 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_uint16 +// A'*B function: GB_AdotB__plus_div_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint32.c new file mode 100644 index 0000000000..52c5ff5d1d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_uint32 +// A'*B function: GB_AdotB__plus_div_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint64.c new file mode 100644 index 0000000000..55732e2d15 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_uint64 +// A'*B function: GB_AdotB__plus_div_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint8.c new file mode 100644 index 0000000000..5da995fdc9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_div_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_div_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_div_uint8 +// A'*B function: GB_AdotB__plus_div_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_fp32.c new file mode 100644 index 0000000000..ca33f5ed16 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_fp32 +// A'*B function: GB_AdotB__plus_first_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_fp64.c new file mode 100644 index 0000000000..4443d7c69d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_fp64 +// A'*B function: GB_AdotB__plus_first_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_int16.c new file mode 100644 index 0000000000..4a0f567f91 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_int16 +// A'*B function: GB_AdotB__plus_first_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_int32.c new file mode 100644 index 0000000000..579570edff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_int32 +// A'*B function: GB_AdotB__plus_first_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_int64.c new file mode 100644 index 0000000000..e5ff679391 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_int64 +// A'*B function: GB_AdotB__plus_first_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_int8.c new file mode 100644 index 0000000000..dd60a40fc1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_int8 +// A'*B function: GB_AdotB__plus_first_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint16.c new file mode 100644 index 0000000000..843b3177a3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_uint16 +// A'*B function: GB_AdotB__plus_first_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint32.c new file mode 100644 index 0000000000..5300cdf7dc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_uint32 +// A'*B function: GB_AdotB__plus_first_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint64.c new file mode 100644 index 0000000000..180e522242 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_uint64 +// A'*B function: GB_AdotB__plus_first_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint8.c new file mode 100644 index 0000000000..d978dfba85 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_first_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_first_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_first_uint8 +// A'*B function: GB_AdotB__plus_first_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_fp32.c new file mode 100644 index 0000000000..8be2c89a18 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_fp32 +// A'*B function: GB_AdotB__plus_iseq_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_fp64.c new file mode 100644 index 0000000000..0f452c4cd4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_fp64 +// A'*B function: GB_AdotB__plus_iseq_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int16.c new file mode 100644 index 0000000000..26b24a1c6d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_int16 +// A'*B function: GB_AdotB__plus_iseq_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int32.c new file mode 100644 index 0000000000..8a8ea5991b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_int32 +// A'*B function: GB_AdotB__plus_iseq_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int64.c new file mode 100644 index 0000000000..4825217ac3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_int64 +// A'*B function: GB_AdotB__plus_iseq_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int8.c new file mode 100644 index 0000000000..34119c94e7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_int8 +// A'*B function: GB_AdotB__plus_iseq_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint16.c new file mode 100644 index 0000000000..3a9264020b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_uint16 +// A'*B function: GB_AdotB__plus_iseq_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint32.c new file mode 100644 index 0000000000..aea50eaaf8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_uint32 +// A'*B function: GB_AdotB__plus_iseq_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint64.c new file mode 100644 index 0000000000..5f9e2ccffe --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_uint64 +// A'*B function: GB_AdotB__plus_iseq_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint8.c new file mode 100644 index 0000000000..e96302b81a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_iseq_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_iseq_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_iseq_uint8 +// A'*B function: GB_AdotB__plus_iseq_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik == bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_fp32.c new file mode 100644 index 0000000000..0dabbb64d3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_fp32 +// A'*B function: GB_AdotB__plus_isge_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_fp64.c new file mode 100644 index 0000000000..b64831d3ff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_fp64 +// A'*B function: GB_AdotB__plus_isge_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int16.c new file mode 100644 index 0000000000..55c2c56d0d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_int16 +// A'*B function: GB_AdotB__plus_isge_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int32.c new file mode 100644 index 0000000000..d6cf024905 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_int32 +// A'*B function: GB_AdotB__plus_isge_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int64.c new file mode 100644 index 0000000000..c9162ebbab --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_int64 +// A'*B function: GB_AdotB__plus_isge_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int8.c new file mode 100644 index 0000000000..3d58bbf44d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_int8 +// A'*B function: GB_AdotB__plus_isge_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint16.c new file mode 100644 index 0000000000..eef08917e5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_uint16 +// A'*B function: GB_AdotB__plus_isge_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint32.c new file mode 100644 index 0000000000..62407fddb8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_uint32 +// A'*B function: GB_AdotB__plus_isge_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint64.c new file mode 100644 index 0000000000..16753b7128 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_uint64 +// A'*B function: GB_AdotB__plus_isge_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint8.c new file mode 100644 index 0000000000..ae2957e271 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isge_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isge_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isge_uint8 +// A'*B function: GB_AdotB__plus_isge_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik >= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_fp32.c new file mode 100644 index 0000000000..07d6407657 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_fp32 +// A'*B function: GB_AdotB__plus_isgt_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_fp64.c new file mode 100644 index 0000000000..05e631995c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_fp64 +// A'*B function: GB_AdotB__plus_isgt_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int16.c new file mode 100644 index 0000000000..8c2983423f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_int16 +// A'*B function: GB_AdotB__plus_isgt_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int32.c new file mode 100644 index 0000000000..13ba5b2bab --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_int32 +// A'*B function: GB_AdotB__plus_isgt_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int64.c new file mode 100644 index 0000000000..e6b45a1e7a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_int64 +// A'*B function: GB_AdotB__plus_isgt_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int8.c new file mode 100644 index 0000000000..b810e331c9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_int8 +// A'*B function: GB_AdotB__plus_isgt_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint16.c new file mode 100644 index 0000000000..b8878b1ca8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_uint16 +// A'*B function: GB_AdotB__plus_isgt_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint32.c new file mode 100644 index 0000000000..655f101ec1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_uint32 +// A'*B function: GB_AdotB__plus_isgt_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint64.c new file mode 100644 index 0000000000..3aa92a756c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_uint64 +// A'*B function: GB_AdotB__plus_isgt_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint8.c new file mode 100644 index 0000000000..bddc08e778 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isgt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isgt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isgt_uint8 +// A'*B function: GB_AdotB__plus_isgt_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik > bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_fp32.c new file mode 100644 index 0000000000..04e4b2b0ea --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_fp32 +// A'*B function: GB_AdotB__plus_isle_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_fp64.c new file mode 100644 index 0000000000..cb2ad50b50 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_fp64 +// A'*B function: GB_AdotB__plus_isle_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int16.c new file mode 100644 index 0000000000..6a92d15c27 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_int16 +// A'*B function: GB_AdotB__plus_isle_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int32.c new file mode 100644 index 0000000000..99386cd9af --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_int32 +// A'*B function: GB_AdotB__plus_isle_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int64.c new file mode 100644 index 0000000000..de9a7f06a5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_int64 +// A'*B function: GB_AdotB__plus_isle_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int8.c new file mode 100644 index 0000000000..5f6842f4e9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_int8 +// A'*B function: GB_AdotB__plus_isle_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint16.c new file mode 100644 index 0000000000..86585601f5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_uint16 +// A'*B function: GB_AdotB__plus_isle_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint32.c new file mode 100644 index 0000000000..851d696041 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_uint32 +// A'*B function: GB_AdotB__plus_isle_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint64.c new file mode 100644 index 0000000000..76fc7f1291 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_uint64 +// A'*B function: GB_AdotB__plus_isle_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint8.c new file mode 100644 index 0000000000..dcb7a46b23 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isle_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isle_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isle_uint8 +// A'*B function: GB_AdotB__plus_isle_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik <= bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_fp32.c new file mode 100644 index 0000000000..9b79ce2cdc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_fp32 +// A'*B function: GB_AdotB__plus_islt_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_fp64.c new file mode 100644 index 0000000000..0644136589 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_fp64 +// A'*B function: GB_AdotB__plus_islt_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int16.c new file mode 100644 index 0000000000..4033761076 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_int16 +// A'*B function: GB_AdotB__plus_islt_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int32.c new file mode 100644 index 0000000000..a220b5bc5a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_int32 +// A'*B function: GB_AdotB__plus_islt_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int64.c new file mode 100644 index 0000000000..10b8eaf9b4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_int64 +// A'*B function: GB_AdotB__plus_islt_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int8.c new file mode 100644 index 0000000000..47b1f26e1e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_int8 +// A'*B function: GB_AdotB__plus_islt_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint16.c new file mode 100644 index 0000000000..f7d7ad97c1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_uint16 +// A'*B function: GB_AdotB__plus_islt_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint32.c new file mode 100644 index 0000000000..7f7aefe1e0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_uint32 +// A'*B function: GB_AdotB__plus_islt_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint64.c new file mode 100644 index 0000000000..509b3774d7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_uint64 +// A'*B function: GB_AdotB__plus_islt_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint8.c new file mode 100644 index 0000000000..74133eaf89 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_islt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_islt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_islt_uint8 +// A'*B function: GB_AdotB__plus_islt_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik < bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_fp32.c new file mode 100644 index 0000000000..a770d91db1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_fp32 +// A'*B function: GB_AdotB__plus_isne_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_fp64.c new file mode 100644 index 0000000000..18f4bdcc4f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_fp64 +// A'*B function: GB_AdotB__plus_isne_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int16.c new file mode 100644 index 0000000000..6972813202 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_int16 +// A'*B function: GB_AdotB__plus_isne_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int32.c new file mode 100644 index 0000000000..8995900ce0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_int32 +// A'*B function: GB_AdotB__plus_isne_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int64.c new file mode 100644 index 0000000000..e90e470adb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_int64 +// A'*B function: GB_AdotB__plus_isne_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int8.c new file mode 100644 index 0000000000..a8d64ada02 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_int8 +// A'*B function: GB_AdotB__plus_isne_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint16.c new file mode 100644 index 0000000000..caf95a2df0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_uint16 +// A'*B function: GB_AdotB__plus_isne_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint32.c new file mode 100644 index 0000000000..ce479c90b9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_uint32 +// A'*B function: GB_AdotB__plus_isne_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint64.c new file mode 100644 index 0000000000..563a99188f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_uint64 +// A'*B function: GB_AdotB__plus_isne_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint8.c new file mode 100644 index 0000000000..4ab2f3ea4c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_isne_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_isne_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_isne_uint8 +// A'*B function: GB_AdotB__plus_isne_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik != bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_fp32.c new file mode 100644 index 0000000000..4976f241f1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_fp32 +// A'*B function: GB_AdotB__plus_land_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_fp64.c new file mode 100644 index 0000000000..549030ecdc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_fp64 +// A'*B function: GB_AdotB__plus_land_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_int16.c new file mode 100644 index 0000000000..3e82267d5e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_int16 +// A'*B function: GB_AdotB__plus_land_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_int32.c new file mode 100644 index 0000000000..8c30c42d3f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_int32 +// A'*B function: GB_AdotB__plus_land_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_int64.c new file mode 100644 index 0000000000..c7e460b252 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_int64 +// A'*B function: GB_AdotB__plus_land_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_int8.c new file mode 100644 index 0000000000..539fd91b0c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_int8 +// A'*B function: GB_AdotB__plus_land_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint16.c new file mode 100644 index 0000000000..10e6bc87ff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_uint16 +// A'*B function: GB_AdotB__plus_land_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint32.c new file mode 100644 index 0000000000..36bd248cba --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_uint32 +// A'*B function: GB_AdotB__plus_land_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint64.c new file mode 100644 index 0000000000..967735cb7d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_uint64 +// A'*B function: GB_AdotB__plus_land_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint8.c new file mode 100644 index 0000000000..2809f5760d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_land_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_land_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_land_uint8 +// A'*B function: GB_AdotB__plus_land_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) && (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_fp32.c new file mode 100644 index 0000000000..c1c285b33e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_fp32 +// A'*B function: GB_AdotB__plus_lor_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_fp64.c new file mode 100644 index 0000000000..035abf8aea --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_fp64 +// A'*B function: GB_AdotB__plus_lor_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int16.c new file mode 100644 index 0000000000..acd6c06ddc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_int16 +// A'*B function: GB_AdotB__plus_lor_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int32.c new file mode 100644 index 0000000000..3b372ec8f2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_int32 +// A'*B function: GB_AdotB__plus_lor_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int64.c new file mode 100644 index 0000000000..86972bd250 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_int64 +// A'*B function: GB_AdotB__plus_lor_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int8.c new file mode 100644 index 0000000000..8fc8bd6caa --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_int8 +// A'*B function: GB_AdotB__plus_lor_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint16.c new file mode 100644 index 0000000000..bc90d78864 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_uint16 +// A'*B function: GB_AdotB__plus_lor_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint32.c new file mode 100644 index 0000000000..fe13d4a116 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_uint32 +// A'*B function: GB_AdotB__plus_lor_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint64.c new file mode 100644 index 0000000000..3da599525e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_uint64 +// A'*B function: GB_AdotB__plus_lor_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint8.c new file mode 100644 index 0000000000..4db5a712f7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lor_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lor_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lor_uint8 +// A'*B function: GB_AdotB__plus_lor_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) || (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_fp32.c new file mode 100644 index 0000000000..7138b485c8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_fp32 +// A'*B function: GB_AdotB__plus_lxor_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_fp64.c new file mode 100644 index 0000000000..0706480fdf --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_fp64 +// A'*B function: GB_AdotB__plus_lxor_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int16.c new file mode 100644 index 0000000000..6500a0abd1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_int16 +// A'*B function: GB_AdotB__plus_lxor_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int32.c new file mode 100644 index 0000000000..e529153c70 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_int32 +// A'*B function: GB_AdotB__plus_lxor_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int64.c new file mode 100644 index 0000000000..2818aebda3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_int64 +// A'*B function: GB_AdotB__plus_lxor_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int8.c new file mode 100644 index 0000000000..f75d70e8fa --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_int8 +// A'*B function: GB_AdotB__plus_lxor_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint16.c new file mode 100644 index 0000000000..7fa7320ce6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_uint16 +// A'*B function: GB_AdotB__plus_lxor_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint32.c new file mode 100644 index 0000000000..0459198926 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_uint32 +// A'*B function: GB_AdotB__plus_lxor_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint64.c new file mode 100644 index 0000000000..e2850f8e3a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_uint64 +// A'*B function: GB_AdotB__plus_lxor_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint8.c new file mode 100644 index 0000000000..314e089361 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_lxor_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_lxor_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_lxor_uint8 +// A'*B function: GB_AdotB__plus_lxor_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) != (bkj != 0) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_fp32.c new file mode 100644 index 0000000000..4f081249b0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_fp32 +// A'*B function: GB_AdotB__plus_max_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (FMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = FMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = FMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = FMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_fp64.c new file mode 100644 index 0000000000..e5fc45d151 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_fp64 +// A'*B function: GB_AdotB__plus_max_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (FMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = FMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = FMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = FMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_int16.c new file mode 100644 index 0000000000..f15eeee93c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_int16 +// A'*B function: GB_AdotB__plus_max_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = IMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_int32.c new file mode 100644 index 0000000000..af30247a8a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_int32 +// A'*B function: GB_AdotB__plus_max_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = IMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_int64.c new file mode 100644 index 0000000000..f468559c52 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_int64 +// A'*B function: GB_AdotB__plus_max_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = IMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_int8.c new file mode 100644 index 0000000000..cefc05c79b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_int8 +// A'*B function: GB_AdotB__plus_max_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = IMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint16.c new file mode 100644 index 0000000000..82d7d6dbdf --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_uint16 +// A'*B function: GB_AdotB__plus_max_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = IMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint32.c new file mode 100644 index 0000000000..3a1844cad7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_uint32 +// A'*B function: GB_AdotB__plus_max_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = IMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint64.c new file mode 100644 index 0000000000..3ad0387a34 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_uint64 +// A'*B function: GB_AdotB__plus_max_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = IMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint8.c new file mode 100644 index 0000000000..2f22736fa7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_max_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_max_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_max_uint8 +// A'*B function: GB_AdotB__plus_max_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = IMAX(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_fp32.c new file mode 100644 index 0000000000..c117ede5ff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_fp32 +// A'*B function: GB_AdotB__plus_min_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (FMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = FMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = FMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = FMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_fp64.c new file mode 100644 index 0000000000..bf3669fd1b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_fp64 +// A'*B function: GB_AdotB__plus_min_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (FMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = FMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = FMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = FMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_int16.c new file mode 100644 index 0000000000..7cf3e352ef --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_int16 +// A'*B function: GB_AdotB__plus_min_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = IMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_int32.c new file mode 100644 index 0000000000..dea57be029 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_int32 +// A'*B function: GB_AdotB__plus_min_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = IMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_int64.c new file mode 100644 index 0000000000..cb05f1d077 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_int64 +// A'*B function: GB_AdotB__plus_min_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = IMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_int8.c new file mode 100644 index 0000000000..f7883a2090 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_int8 +// A'*B function: GB_AdotB__plus_min_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = IMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint16.c new file mode 100644 index 0000000000..1cc4a740a0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_uint16 +// A'*B function: GB_AdotB__plus_min_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = IMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint32.c new file mode 100644 index 0000000000..945e3c2e0f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_uint32 +// A'*B function: GB_AdotB__plus_min_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = IMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint64.c new file mode 100644 index 0000000000..45791a5535 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_uint64 +// A'*B function: GB_AdotB__plus_min_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = IMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint8.c new file mode 100644 index 0000000000..f4b97d56e0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_min_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_min_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_min_uint8 +// A'*B function: GB_AdotB__plus_min_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = IMIN(aik,bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_fp32.c new file mode 100644 index 0000000000..949849925e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_fp32 +// A'*B function: GB_AdotB__plus_minus_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_fp64.c new file mode 100644 index 0000000000..69063c09cf --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_fp64 +// A'*B function: GB_AdotB__plus_minus_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int16.c new file mode 100644 index 0000000000..d9735e92d0 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_int16 +// A'*B function: GB_AdotB__plus_minus_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int32.c new file mode 100644 index 0000000000..330c930a1b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_int32 +// A'*B function: GB_AdotB__plus_minus_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int64.c new file mode 100644 index 0000000000..3565ae6fb2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_int64 +// A'*B function: GB_AdotB__plus_minus_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int8.c new file mode 100644 index 0000000000..d76d25e404 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_int8 +// A'*B function: GB_AdotB__plus_minus_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint16.c new file mode 100644 index 0000000000..0a3d82fe57 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_uint16 +// A'*B function: GB_AdotB__plus_minus_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint32.c new file mode 100644 index 0000000000..4a7de945fa --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_uint32 +// A'*B function: GB_AdotB__plus_minus_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint64.c new file mode 100644 index 0000000000..ade729133b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_uint64 +// A'*B function: GB_AdotB__plus_minus_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint8.c new file mode 100644 index 0000000000..1072f6dae3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_minus_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_minus_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_minus_uint8 +// A'*B function: GB_AdotB__plus_minus_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_fp32.c new file mode 100644 index 0000000000..1a55d17e98 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_fp32 +// A'*B function: GB_AdotB__plus_plus_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_fp64.c new file mode 100644 index 0000000000..7ebc197e06 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_fp64 +// A'*B function: GB_AdotB__plus_plus_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int16.c new file mode 100644 index 0000000000..f6f74c0ab1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_int16 +// A'*B function: GB_AdotB__plus_plus_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int32.c new file mode 100644 index 0000000000..8c57a1f8b1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_int32 +// A'*B function: GB_AdotB__plus_plus_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int64.c new file mode 100644 index 0000000000..d853d20785 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_int64 +// A'*B function: GB_AdotB__plus_plus_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int8.c new file mode 100644 index 0000000000..3bbd5a68e7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_int8 +// A'*B function: GB_AdotB__plus_plus_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint16.c new file mode 100644 index 0000000000..a8f0dc3946 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_uint16 +// A'*B function: GB_AdotB__plus_plus_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint32.c new file mode 100644 index 0000000000..4283994b4b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_uint32 +// A'*B function: GB_AdotB__plus_plus_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint64.c new file mode 100644 index 0000000000..2d3ca76387 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_uint64 +// A'*B function: GB_AdotB__plus_plus_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint8.c new file mode 100644 index 0000000000..c955afe82a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_plus_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_plus_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_plus_uint8 +// A'*B function: GB_AdotB__plus_plus_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik + bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_fp32.c new file mode 100644 index 0000000000..5ccdd5a5ba --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_fp32 +// A'*B function: GB_AdotB__plus_second_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_fp64.c new file mode 100644 index 0000000000..bf6e266f3d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_fp64 +// A'*B function: GB_AdotB__plus_second_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_int16.c new file mode 100644 index 0000000000..21a0cdf59d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_int16 +// A'*B function: GB_AdotB__plus_second_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_int32.c new file mode 100644 index 0000000000..d914126d0e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_int32 +// A'*B function: GB_AdotB__plus_second_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_int64.c new file mode 100644 index 0000000000..9605d0dbb7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_int64 +// A'*B function: GB_AdotB__plus_second_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_int8.c new file mode 100644 index 0000000000..8a3b7591f4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_int8 +// A'*B function: GB_AdotB__plus_second_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint16.c new file mode 100644 index 0000000000..f4748abc7e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_uint16 +// A'*B function: GB_AdotB__plus_second_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint32.c new file mode 100644 index 0000000000..f6cc31ccac --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_uint32 +// A'*B function: GB_AdotB__plus_second_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint64.c new file mode 100644 index 0000000000..c452e34a26 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_uint64 +// A'*B function: GB_AdotB__plus_second_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint8.c new file mode 100644 index 0000000000..a371cbbb71 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_second_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_second_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_second_uint8 +// A'*B function: GB_AdotB__plus_second_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_fp32.c new file mode 100644 index 0000000000..6ff0c77c89 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_fp32 +// A'*B function: GB_AdotB__plus_times_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_fp64.c new file mode 100644 index 0000000000..2286398923 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_fp64 +// A'*B function: GB_AdotB__plus_times_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_int16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_int16.c new file mode 100644 index 0000000000..bd07228296 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_int16 +// A'*B function: GB_AdotB__plus_times_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_int32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_int32.c new file mode 100644 index 0000000000..5a0cc98616 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_int32 +// A'*B function: GB_AdotB__plus_times_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_int64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_int64.c new file mode 100644 index 0000000000..1951c7e3c1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_int64 +// A'*B function: GB_AdotB__plus_times_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_int8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_int8.c new file mode 100644 index 0000000000..d74b4edfed --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_int8 +// A'*B function: GB_AdotB__plus_times_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint16.c new file mode 100644 index 0000000000..4c23ca6ad6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_uint16 +// A'*B function: GB_AdotB__plus_times_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint32.c new file mode 100644 index 0000000000..bfa99d2b5e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_uint32 +// A'*B function: GB_AdotB__plus_times_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint64.c new file mode 100644 index 0000000000..4351ad3767 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_uint64 +// A'*B function: GB_AdotB__plus_times_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint8.c new file mode 100644 index 0000000000..4543d6d449 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__plus_times_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__plus_times_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__plus_times_uint8 +// A'*B function: GB_AdotB__plus_times_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 0 (where cij += 0 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij += t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__plus_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] += t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 0 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 0 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik * bkj ; + w [i] += t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__plus_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij += t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij += t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 0 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij += t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__semirings.h b/GraphBLAS/Source/Generated/GB_AxB__semirings.h new file mode 100644 index 0000000000..a0bc6ee581 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__semirings.h @@ -0,0 +1,17289 @@ +//------------------------------------------------------------------------------ +// GB_AxB__semirings.h: definitions for GB_AxB__*.c +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// This file has been automatically generated from Template/GB_AxB.h + +void GB_AxB__min_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_first_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_second_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_eq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_gt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_ge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_le_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_land_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__min_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__min_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__max_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__max_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__plus_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__plus_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__times_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__times_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lor_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lor_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__land_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__land_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__lxor_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__lxor_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AxB__eq_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB__eq_lxor_bool +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_fp32.c new file mode 100644 index 0000000000..6dd296c5d5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_fp32 +// A'*B function: GB_AdotB__times_div_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj/aik) : (aik/bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = flip ? (bkj/aik) : (aik/bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = flip ? (bkj/aik) : (aik/bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = flip ? (bkj/aki) : (aki/bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj/aki) : (aki/bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_fp64.c new file mode 100644 index 0000000000..ca8902fd84 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_fp64 +// A'*B function: GB_AdotB__times_div_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj/aik) : (aik/bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = flip ? (bkj/aik) : (aik/bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = flip ? (bkj/aik) : (aik/bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = flip ? (bkj/aki) : (aki/bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj/aki) : (aki/bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_int16.c new file mode 100644 index 0000000000..caa7bfaf5c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_int16 +// A'*B function: GB_AdotB__times_div_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_int32.c new file mode 100644 index 0000000000..bf5dc9b42d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_int32 +// A'*B function: GB_AdotB__times_div_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_int64.c new file mode 100644 index 0000000000..b479546049 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_int64 +// A'*B function: GB_AdotB__times_div_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_int8.c new file mode 100644 index 0000000000..097a818d18 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_int8 +// A'*B function: GB_AdotB__times_div_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_uint16.c new file mode 100644 index 0000000000..104c8422b3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_uint16 +// A'*B function: GB_AdotB__times_div_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_uint32.c new file mode 100644 index 0000000000..9220bc742f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_uint32 +// A'*B function: GB_AdotB__times_div_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_uint64.c new file mode 100644 index 0000000000..a604c65c88 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_uint64 +// A'*B function: GB_AdotB__times_div_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_div_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_div_uint8.c new file mode 100644 index 0000000000..19a54d68eb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_div_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_div_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_div_uint8 +// A'*B function: GB_AdotB__times_div_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? IDIV(bkj,aik) : IDIV(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? IDIV(bkj,aik) : IDIV(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_div_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? IDIV(bkj,aki) : IDIV(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_fp32.c new file mode 100644 index 0000000000..7cc9df42ed --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_fp32 +// A'*B function: GB_AdotB__times_first_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_fp64.c new file mode 100644 index 0000000000..83fec69242 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_fp64 +// A'*B function: GB_AdotB__times_first_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_int16.c new file mode 100644 index 0000000000..b1e36f736c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_int16 +// A'*B function: GB_AdotB__times_first_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_int32.c new file mode 100644 index 0000000000..5b0b45fd6e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_int32 +// A'*B function: GB_AdotB__times_first_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_int64.c new file mode 100644 index 0000000000..0954d2f983 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_int64 +// A'*B function: GB_AdotB__times_first_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_int8.c new file mode 100644 index 0000000000..3fbfdb1c58 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_int8 +// A'*B function: GB_AdotB__times_first_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_uint16.c new file mode 100644 index 0000000000..7829c4747e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_uint16 +// A'*B function: GB_AdotB__times_first_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_uint32.c new file mode 100644 index 0000000000..bff04ca863 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_uint32 +// A'*B function: GB_AdotB__times_first_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_uint64.c new file mode 100644 index 0000000000..8f7aeabfc4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_uint64 +// A'*B function: GB_AdotB__times_first_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_first_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_first_uint8.c new file mode 100644 index 0000000000..5b28ab8285 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_first_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_first_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_first_uint8 +// A'*B function: GB_AdotB__times_first_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_first_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_fp32.c new file mode 100644 index 0000000000..7ddba86a3e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_fp32 +// A'*B function: GB_AdotB__times_iseq_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_fp64.c new file mode 100644 index 0000000000..2e5bc6ac7c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_fp64 +// A'*B function: GB_AdotB__times_iseq_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int16.c new file mode 100644 index 0000000000..b959d7aa2c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_int16 +// A'*B function: GB_AdotB__times_iseq_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int32.c new file mode 100644 index 0000000000..935714c041 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_int32 +// A'*B function: GB_AdotB__times_iseq_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int64.c new file mode 100644 index 0000000000..6f83b10f97 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_int64 +// A'*B function: GB_AdotB__times_iseq_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int8.c new file mode 100644 index 0000000000..a65a738f2e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_int8 +// A'*B function: GB_AdotB__times_iseq_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint16.c new file mode 100644 index 0000000000..2c22207d6e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_uint16 +// A'*B function: GB_AdotB__times_iseq_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint32.c new file mode 100644 index 0000000000..2e1ba06dd2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_uint32 +// A'*B function: GB_AdotB__times_iseq_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint64.c new file mode 100644 index 0000000000..16be7ffa74 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_uint64 +// A'*B function: GB_AdotB__times_iseq_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint8.c new file mode 100644 index 0000000000..d295679aaa --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_iseq_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_iseq_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_iseq_uint8 +// A'*B function: GB_AdotB__times_iseq_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik == bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik == bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik == bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_iseq_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki == bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki == bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_fp32.c new file mode 100644 index 0000000000..5a6ad5684a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_fp32 +// A'*B function: GB_AdotB__times_isge_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_fp64.c new file mode 100644 index 0000000000..6c61aef6fc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_fp64 +// A'*B function: GB_AdotB__times_isge_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_int16.c new file mode 100644 index 0000000000..3d4e5902e7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_int16 +// A'*B function: GB_AdotB__times_isge_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_int32.c new file mode 100644 index 0000000000..7078d1df19 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_int32 +// A'*B function: GB_AdotB__times_isge_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_int64.c new file mode 100644 index 0000000000..5a409f3725 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_int64 +// A'*B function: GB_AdotB__times_isge_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_int8.c new file mode 100644 index 0000000000..482dad49e1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_int8 +// A'*B function: GB_AdotB__times_isge_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint16.c new file mode 100644 index 0000000000..cb2eae1ecc --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_uint16 +// A'*B function: GB_AdotB__times_isge_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint32.c new file mode 100644 index 0000000000..7d200fdc60 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_uint32 +// A'*B function: GB_AdotB__times_isge_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint64.c new file mode 100644 index 0000000000..abe3b6298a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_uint64 +// A'*B function: GB_AdotB__times_isge_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint8.c new file mode 100644 index 0000000000..1181edb950 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isge_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isge_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isge_uint8 +// A'*B function: GB_AdotB__times_isge_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik >= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik >= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik >= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isge_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki >= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki >= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_fp32.c new file mode 100644 index 0000000000..66dff6c518 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_fp32 +// A'*B function: GB_AdotB__times_isgt_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_fp64.c new file mode 100644 index 0000000000..89da2ca158 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_fp64 +// A'*B function: GB_AdotB__times_isgt_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int16.c new file mode 100644 index 0000000000..1256f3800c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_int16 +// A'*B function: GB_AdotB__times_isgt_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int32.c new file mode 100644 index 0000000000..481ddad090 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_int32 +// A'*B function: GB_AdotB__times_isgt_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int64.c new file mode 100644 index 0000000000..e2cd680b40 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_int64 +// A'*B function: GB_AdotB__times_isgt_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int8.c new file mode 100644 index 0000000000..40f18210e1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_int8 +// A'*B function: GB_AdotB__times_isgt_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint16.c new file mode 100644 index 0000000000..dd748e2eb7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_uint16 +// A'*B function: GB_AdotB__times_isgt_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint32.c new file mode 100644 index 0000000000..1361c5ce37 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_uint32 +// A'*B function: GB_AdotB__times_isgt_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint64.c new file mode 100644 index 0000000000..9037b7bd1f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_uint64 +// A'*B function: GB_AdotB__times_isgt_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint8.c new file mode 100644 index 0000000000..6f35637f9c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isgt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isgt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isgt_uint8 +// A'*B function: GB_AdotB__times_isgt_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik > bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik > bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik > bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isgt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki > bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki > bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_fp32.c new file mode 100644 index 0000000000..8aafc7fcee --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_fp32 +// A'*B function: GB_AdotB__times_isle_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_fp64.c new file mode 100644 index 0000000000..9d1b458115 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_fp64 +// A'*B function: GB_AdotB__times_isle_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_int16.c new file mode 100644 index 0000000000..2b6d8e8293 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_int16 +// A'*B function: GB_AdotB__times_isle_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_int32.c new file mode 100644 index 0000000000..c21719e119 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_int32 +// A'*B function: GB_AdotB__times_isle_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_int64.c new file mode 100644 index 0000000000..19036ddcb1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_int64 +// A'*B function: GB_AdotB__times_isle_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_int8.c new file mode 100644 index 0000000000..8fddc3ab94 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_int8 +// A'*B function: GB_AdotB__times_isle_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint16.c new file mode 100644 index 0000000000..bc3a1f979b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_uint16 +// A'*B function: GB_AdotB__times_isle_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint32.c new file mode 100644 index 0000000000..7700756ce7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_uint32 +// A'*B function: GB_AdotB__times_isle_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint64.c new file mode 100644 index 0000000000..3bc8809e0f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_uint64 +// A'*B function: GB_AdotB__times_isle_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint8.c new file mode 100644 index 0000000000..946692a679 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isle_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isle_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isle_uint8 +// A'*B function: GB_AdotB__times_isle_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik <= bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik <= bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik <= bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isle_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki <= bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki <= bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_fp32.c new file mode 100644 index 0000000000..c98b8e23f1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_fp32 +// A'*B function: GB_AdotB__times_islt_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_fp64.c new file mode 100644 index 0000000000..10de84a482 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_fp64 +// A'*B function: GB_AdotB__times_islt_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_int16.c new file mode 100644 index 0000000000..9ea96911f6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_int16 +// A'*B function: GB_AdotB__times_islt_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_int32.c new file mode 100644 index 0000000000..93c621d9c6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_int32 +// A'*B function: GB_AdotB__times_islt_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_int64.c new file mode 100644 index 0000000000..1f49964f06 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_int64 +// A'*B function: GB_AdotB__times_islt_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_int8.c new file mode 100644 index 0000000000..c07fd2a8a2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_int8 +// A'*B function: GB_AdotB__times_islt_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint16.c new file mode 100644 index 0000000000..da02de046b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_uint16 +// A'*B function: GB_AdotB__times_islt_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint32.c new file mode 100644 index 0000000000..466bf50999 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_uint32 +// A'*B function: GB_AdotB__times_islt_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint64.c new file mode 100644 index 0000000000..938f5b9058 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_uint64 +// A'*B function: GB_AdotB__times_islt_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint8.c new file mode 100644 index 0000000000..4b490c88c2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_islt_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_islt_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_islt_uint8 +// A'*B function: GB_AdotB__times_islt_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik < bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik < bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik < bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_islt_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki < bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki < bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_fp32.c new file mode 100644 index 0000000000..a1b29027bb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_fp32 +// A'*B function: GB_AdotB__times_isne_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_fp64.c new file mode 100644 index 0000000000..ca1c1878ef --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_fp64 +// A'*B function: GB_AdotB__times_isne_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_int16.c new file mode 100644 index 0000000000..dcb10603f4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_int16 +// A'*B function: GB_AdotB__times_isne_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_int32.c new file mode 100644 index 0000000000..0371bb46d9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_int32 +// A'*B function: GB_AdotB__times_isne_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_int64.c new file mode 100644 index 0000000000..f5ac126b20 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_int64 +// A'*B function: GB_AdotB__times_isne_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_int8.c new file mode 100644 index 0000000000..9825ca9b9e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_int8 +// A'*B function: GB_AdotB__times_isne_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint16.c new file mode 100644 index 0000000000..af6147c9db --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_uint16 +// A'*B function: GB_AdotB__times_isne_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint32.c new file mode 100644 index 0000000000..571978d930 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_uint32 +// A'*B function: GB_AdotB__times_isne_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint64.c new file mode 100644 index 0000000000..e08d09a4c2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_uint64 +// A'*B function: GB_AdotB__times_isne_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint8.c new file mode 100644 index 0000000000..a097750e8a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_isne_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_isne_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_isne_uint8 +// A'*B function: GB_AdotB__times_isne_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik != bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik != bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik != bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_isne_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki != bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki != bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_fp32.c new file mode 100644 index 0000000000..2d93de272c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_fp32 +// A'*B function: GB_AdotB__times_land_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_fp64.c new file mode 100644 index 0000000000..5186d745cb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_fp64 +// A'*B function: GB_AdotB__times_land_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_int16.c new file mode 100644 index 0000000000..bb1aca7dc1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_int16 +// A'*B function: GB_AdotB__times_land_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_int32.c new file mode 100644 index 0000000000..fd9aba3f2a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_int32 +// A'*B function: GB_AdotB__times_land_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_int64.c new file mode 100644 index 0000000000..6a9be918ff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_int64 +// A'*B function: GB_AdotB__times_land_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_int8.c new file mode 100644 index 0000000000..794c2f6b9e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_int8 +// A'*B function: GB_AdotB__times_land_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_uint16.c new file mode 100644 index 0000000000..d1cd94d1ef --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_uint16 +// A'*B function: GB_AdotB__times_land_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_uint32.c new file mode 100644 index 0000000000..4e9f566ba8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_uint32 +// A'*B function: GB_AdotB__times_land_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_uint64.c new file mode 100644 index 0000000000..74885ba9ed --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_uint64 +// A'*B function: GB_AdotB__times_land_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_land_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_land_uint8.c new file mode 100644 index 0000000000..835cca56ca --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_land_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_land_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_land_uint8 +// A'*B function: GB_AdotB__times_land_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) && (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) && (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) && (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_land_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) && (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) && (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_fp32.c new file mode 100644 index 0000000000..290e0d4b90 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_fp32 +// A'*B function: GB_AdotB__times_lor_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_fp64.c new file mode 100644 index 0000000000..711e9532f1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_fp64 +// A'*B function: GB_AdotB__times_lor_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_int16.c new file mode 100644 index 0000000000..aa0638e36d --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_int16 +// A'*B function: GB_AdotB__times_lor_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_int32.c new file mode 100644 index 0000000000..d18e0eac38 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_int32 +// A'*B function: GB_AdotB__times_lor_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_int64.c new file mode 100644 index 0000000000..720f64d5b3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_int64 +// A'*B function: GB_AdotB__times_lor_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_int8.c new file mode 100644 index 0000000000..ce08f6bba6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_int8 +// A'*B function: GB_AdotB__times_lor_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint16.c new file mode 100644 index 0000000000..ed8f974249 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_uint16 +// A'*B function: GB_AdotB__times_lor_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint32.c new file mode 100644 index 0000000000..37c511d06f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_uint32 +// A'*B function: GB_AdotB__times_lor_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint64.c new file mode 100644 index 0000000000..163d3f2b6f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_uint64 +// A'*B function: GB_AdotB__times_lor_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint8.c new file mode 100644 index 0000000000..fdf2713ff4 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lor_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lor_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lor_uint8 +// A'*B function: GB_AdotB__times_lor_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) || (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) || (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) || (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) || (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) || (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_fp32.c new file mode 100644 index 0000000000..82179245d2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_fp32 +// A'*B function: GB_AdotB__times_lxor_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_fp64.c new file mode 100644 index 0000000000..c45e3d7905 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_fp64 +// A'*B function: GB_AdotB__times_lxor_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int16.c new file mode 100644 index 0000000000..3cbdb9e726 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_int16 +// A'*B function: GB_AdotB__times_lxor_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int32.c new file mode 100644 index 0000000000..d0483bbd79 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_int32 +// A'*B function: GB_AdotB__times_lxor_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int64.c new file mode 100644 index 0000000000..7170aae2d1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_int64 +// A'*B function: GB_AdotB__times_lxor_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int8.c new file mode 100644 index 0000000000..125839e330 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_int8 +// A'*B function: GB_AdotB__times_lxor_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint16.c new file mode 100644 index 0000000000..a8aadd5923 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_uint16 +// A'*B function: GB_AdotB__times_lxor_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint32.c new file mode 100644 index 0000000000..8e1f50d0c6 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_uint32 +// A'*B function: GB_AdotB__times_lxor_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint64.c new file mode 100644 index 0000000000..50b544da0b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_uint64 +// A'*B function: GB_AdotB__times_lxor_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint8.c new file mode 100644 index 0000000000..65bf7042c7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_lxor_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_lxor_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_lxor_uint8 +// A'*B function: GB_AdotB__times_lxor_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = ((aik != 0) != (bkj != 0)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) != (bkj != 0) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = (aik != 0) != (bkj != 0) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_lxor_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = (aki != 0) != (bkj != 0) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = (aki != 0) != (bkj != 0) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_fp32.c new file mode 100644 index 0000000000..58d794c9ff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_fp32 +// A'*B function: GB_AdotB__times_max_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (FMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = FMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = FMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = FMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_fp64.c new file mode 100644 index 0000000000..ba72360b4a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_fp64 +// A'*B function: GB_AdotB__times_max_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (FMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = FMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = FMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = FMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_int16.c new file mode 100644 index 0000000000..09aa44965c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_int16 +// A'*B function: GB_AdotB__times_max_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = IMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_int32.c new file mode 100644 index 0000000000..294d11a8ea --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_int32 +// A'*B function: GB_AdotB__times_max_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = IMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_int64.c new file mode 100644 index 0000000000..eeab6fbc27 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_int64 +// A'*B function: GB_AdotB__times_max_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = IMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_int8.c new file mode 100644 index 0000000000..f25c6bdfd1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_int8 +// A'*B function: GB_AdotB__times_max_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = IMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_uint16.c new file mode 100644 index 0000000000..1eea9fda1c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_uint16 +// A'*B function: GB_AdotB__times_max_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = IMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_uint32.c new file mode 100644 index 0000000000..5a5fa7b5be --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_uint32 +// A'*B function: GB_AdotB__times_max_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = IMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_uint64.c new file mode 100644 index 0000000000..bc3bc17283 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_uint64 +// A'*B function: GB_AdotB__times_max_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = IMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_max_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_max_uint8.c new file mode 100644 index 0000000000..0c45b38013 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_max_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_max_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_max_uint8 +// A'*B function: GB_AdotB__times_max_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMAX(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = IMAX(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = IMAX(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_max_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = IMAX(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMAX(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_fp32.c new file mode 100644 index 0000000000..42e82a3833 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_fp32 +// A'*B function: GB_AdotB__times_min_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (FMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = FMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = FMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = FMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = FMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_fp64.c new file mode 100644 index 0000000000..6de290af9b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_fp64 +// A'*B function: GB_AdotB__times_min_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (FMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = FMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = FMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = FMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = FMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_int16.c new file mode 100644 index 0000000000..9cbaaf188f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_int16 +// A'*B function: GB_AdotB__times_min_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = IMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_int32.c new file mode 100644 index 0000000000..b65f41e4b7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_int32 +// A'*B function: GB_AdotB__times_min_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = IMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_int64.c new file mode 100644 index 0000000000..4d5edac005 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_int64 +// A'*B function: GB_AdotB__times_min_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = IMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_int8.c new file mode 100644 index 0000000000..c6e05ccb0f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_int8 +// A'*B function: GB_AdotB__times_min_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = IMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_uint16.c new file mode 100644 index 0000000000..fa7add0f1f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_uint16 +// A'*B function: GB_AdotB__times_min_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = IMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_uint32.c new file mode 100644 index 0000000000..e0c1aa25d8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_uint32 +// A'*B function: GB_AdotB__times_min_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = IMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_uint64.c new file mode 100644 index 0000000000..6217e7e33a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_uint64 +// A'*B function: GB_AdotB__times_min_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = IMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_min_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_min_uint8.c new file mode 100644 index 0000000000..4d1c9649c1 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_min_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_min_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_min_uint8 +// A'*B function: GB_AdotB__times_min_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (IMIN(aik,bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = IMIN(aik,bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = IMIN(aik,bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_min_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = IMIN(aki,bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = IMIN(aki,bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_fp32.c new file mode 100644 index 0000000000..1509bb5879 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_fp32 +// A'*B function: GB_AdotB__times_minus_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_fp64.c new file mode 100644 index 0000000000..2c20af360e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_fp64 +// A'*B function: GB_AdotB__times_minus_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_int16.c new file mode 100644 index 0000000000..599695e417 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_int16 +// A'*B function: GB_AdotB__times_minus_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_int32.c new file mode 100644 index 0000000000..e5ec21daee --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_int32 +// A'*B function: GB_AdotB__times_minus_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_int64.c new file mode 100644 index 0000000000..4f26d97da7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_int64 +// A'*B function: GB_AdotB__times_minus_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_int8.c new file mode 100644 index 0000000000..674a8925d2 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_int8 +// A'*B function: GB_AdotB__times_minus_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint16.c new file mode 100644 index 0000000000..d7fa857a66 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_uint16 +// A'*B function: GB_AdotB__times_minus_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint32.c new file mode 100644 index 0000000000..ba6bec1e5f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_uint32 +// A'*B function: GB_AdotB__times_minus_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint64.c new file mode 100644 index 0000000000..c528b36e7c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_uint64 +// A'*B function: GB_AdotB__times_minus_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint8.c new file mode 100644 index 0000000000..dbe27f9e5c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_minus_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_minus_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_minus_uint8 +// A'*B function: GB_AdotB__times_minus_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (flip ? (bkj-aik) : (aik-bkj)) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? (bkj-aik) : (aik-bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = flip ? (bkj-aik) : (aik-bkj) ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_minus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = flip ? (bkj-aki) : (aki-bkj) ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_fp32.c new file mode 100644 index 0000000000..bdff69cd11 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_fp32 +// A'*B function: GB_AdotB__times_plus_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_fp64.c new file mode 100644 index 0000000000..f217a2866a --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_fp64 +// A'*B function: GB_AdotB__times_plus_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_int16.c new file mode 100644 index 0000000000..a268487677 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_int16 +// A'*B function: GB_AdotB__times_plus_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_int32.c new file mode 100644 index 0000000000..02d574d3bb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_int32 +// A'*B function: GB_AdotB__times_plus_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_int64.c new file mode 100644 index 0000000000..cedbf6a703 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_int64 +// A'*B function: GB_AdotB__times_plus_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_int8.c new file mode 100644 index 0000000000..527c70d6bd --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_int8 +// A'*B function: GB_AdotB__times_plus_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint16.c new file mode 100644 index 0000000000..fce692bb76 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_uint16 +// A'*B function: GB_AdotB__times_plus_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint32.c new file mode 100644 index 0000000000..1abac21ed8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_uint32 +// A'*B function: GB_AdotB__times_plus_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint64.c new file mode 100644 index 0000000000..ae13c6c2ff --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_uint64 +// A'*B function: GB_AdotB__times_plus_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint8.c new file mode 100644 index 0000000000..6554ae0ee9 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_plus_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_plus_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_plus_uint8 +// A'*B function: GB_AdotB__times_plus_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik + bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik + bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik + bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_plus_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki + bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki + bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_fp32.c new file mode 100644 index 0000000000..d7fb4bdc2b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_fp32 +// A'*B function: GB_AdotB__times_second_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_fp64.c new file mode 100644 index 0000000000..d66bc8a46b --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_fp64 +// A'*B function: GB_AdotB__times_second_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_int16.c new file mode 100644 index 0000000000..ab18028a6c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_int16 +// A'*B function: GB_AdotB__times_second_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_int32.c new file mode 100644 index 0000000000..85c584f5c8 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_int32 +// A'*B function: GB_AdotB__times_second_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_int64.c new file mode 100644 index 0000000000..3da4018ba3 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_int64 +// A'*B function: GB_AdotB__times_second_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_int8.c new file mode 100644 index 0000000000..1dd8f4f96e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_int8 +// A'*B function: GB_AdotB__times_second_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_uint16.c new file mode 100644 index 0000000000..b3d8275e38 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_uint16 +// A'*B function: GB_AdotB__times_second_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_uint32.c new file mode 100644 index 0000000000..7c972cc838 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_uint32 +// A'*B function: GB_AdotB__times_second_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_uint64.c new file mode 100644 index 0000000000..853609e2d7 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_uint64 +// A'*B function: GB_AdotB__times_second_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_second_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_second_uint8.c new file mode 100644 index 0000000000..bf6794ee9e --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_second_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_second_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_second_uint8 +// A'*B function: GB_AdotB__times_second_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_second_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_fp32.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_fp32.c new file mode 100644 index 0000000000..56d3ca123f --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_fp32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_fp32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_fp32 +// A'*B function: GB_AdotB__times_times_fp32 +// Z type : float (the type of C) +// XY type: float (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + float *restrict w = GB_thread_local.Work ; + + float *restrict Cx = C->x ; + const float *restrict Ax = A->x ; + const float *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + float aik = Ax [pa] ; + float t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + float bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + float aik = Ax [pa] ; + float t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_fp32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + float aki = Ax [pa++] ; /* aki = A(k,i) */ \ + float bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + float t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + float *Cx = C->x ; + const float *Ax = A->x ; + const float *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + float cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + float aki = Ax [pa + k] ; // aki = A(k,i) + float bkj = Bx [pb] ; // bkj = B(k,j) + float t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + float aki = Ax [pa] ; // aki = A(k,i) + float bkj = Bx [pb + k] ; // bkj = B(k,j) + float t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_fp64.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_fp64.c new file mode 100644 index 0000000000..1baa528b19 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_fp64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_fp64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_fp64 +// A'*B function: GB_AdotB__times_times_fp64 +// Z type : double (the type of C) +// XY type: double (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + double *restrict w = GB_thread_local.Work ; + + double *restrict Cx = C->x ; + const double *restrict Ax = A->x ; + const double *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + double aik = Ax [pa] ; + double t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + double bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + double aik = Ax [pa] ; + double t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_fp64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + double aki = Ax [pa++] ; /* aki = A(k,i) */ \ + double bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + double t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + double *Cx = C->x ; + const double *Ax = A->x ; + const double *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + double cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + double aki = Ax [pa + k] ; // aki = A(k,i) + double bkj = Bx [pb] ; // bkj = B(k,j) + double t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + double aki = Ax [pa] ; // aki = A(k,i) + double bkj = Bx [pb + k] ; // bkj = B(k,j) + double t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_int16.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_int16.c new file mode 100644 index 0000000000..e7007e9afb --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_int16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_int16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_int16 +// A'*B function: GB_AdotB__times_times_int16 +// Z type : int16_t (the type of C) +// XY type: int16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int16_t *restrict w = GB_thread_local.Work ; + + int16_t *restrict Cx = C->x ; + const int16_t *restrict Ax = A->x ; + const int16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int16_t aik = Ax [pa] ; + int16_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int16_t aik = Ax [pa] ; + int16_t t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_int16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int16_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int16_t *Cx = C->x ; + const int16_t *Ax = A->x ; + const int16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int16_t aki = Ax [pa + k] ; // aki = A(k,i) + int16_t bkj = Bx [pb] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int16_t aki = Ax [pa] ; // aki = A(k,i) + int16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int16_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_int32.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_int32.c new file mode 100644 index 0000000000..b37d0c7f17 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_int32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_int32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_int32 +// A'*B function: GB_AdotB__times_times_int32 +// Z type : int32_t (the type of C) +// XY type: int32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int32_t *restrict w = GB_thread_local.Work ; + + int32_t *restrict Cx = C->x ; + const int32_t *restrict Ax = A->x ; + const int32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int32_t aik = Ax [pa] ; + int32_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int32_t aik = Ax [pa] ; + int32_t t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_int32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int32_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int32_t *Cx = C->x ; + const int32_t *Ax = A->x ; + const int32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int32_t aki = Ax [pa + k] ; // aki = A(k,i) + int32_t bkj = Bx [pb] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int32_t aki = Ax [pa] ; // aki = A(k,i) + int32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int32_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_int64.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_int64.c new file mode 100644 index 0000000000..15ec9b7fc5 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_int64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_int64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_int64 +// A'*B function: GB_AdotB__times_times_int64 +// Z type : int64_t (the type of C) +// XY type: int64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int64_t *restrict w = GB_thread_local.Work ; + + int64_t *restrict Cx = C->x ; + const int64_t *restrict Ax = A->x ; + const int64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int64_t aik = Ax [pa] ; + int64_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int64_t aik = Ax [pa] ; + int64_t t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_int64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int64_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int64_t *Cx = C->x ; + const int64_t *Ax = A->x ; + const int64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int64_t aki = Ax [pa + k] ; // aki = A(k,i) + int64_t bkj = Bx [pb] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int64_t aki = Ax [pa] ; // aki = A(k,i) + int64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int64_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_int8.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_int8.c new file mode 100644 index 0000000000..53ebd9827c --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_int8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_int8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_int8 +// A'*B function: GB_AdotB__times_times_int8 +// Z type : int8_t (the type of C) +// XY type: int8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + int8_t *restrict w = GB_thread_local.Work ; + + int8_t *restrict Cx = C->x ; + const int8_t *restrict Ax = A->x ; + const int8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + int8_t aik = Ax [pa] ; + int8_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + int8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + int8_t aik = Ax [pa] ; + int8_t t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_int8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + int8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + int8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + int8_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + int8_t *Cx = C->x ; + const int8_t *Ax = A->x ; + const int8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + int8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + int8_t aki = Ax [pa + k] ; // aki = A(k,i) + int8_t bkj = Bx [pb] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + int8_t aki = Ax [pa] ; // aki = A(k,i) + int8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + int8_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_uint16.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_uint16.c new file mode 100644 index 0000000000..d548455268 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_uint16.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_uint16: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_uint16 +// A'*B function: GB_AdotB__times_times_uint16 +// Z type : uint16_t (the type of C) +// XY type: uint16_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint16_t *restrict w = GB_thread_local.Work ; + + uint16_t *restrict Cx = C->x ; + const uint16_t *restrict Ax = A->x ; + const uint16_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint16_t aik = Ax [pa] ; + uint16_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint16_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint16_t aik = Ax [pa] ; + uint16_t t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_uint16 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint16_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint16_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint16_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint16_t *Cx = C->x ; + const uint16_t *Ax = A->x ; + const uint16_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint16_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint16_t aki = Ax [pa + k] ; // aki = A(k,i) + uint16_t bkj = Bx [pb] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint16_t aki = Ax [pa] ; // aki = A(k,i) + uint16_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint16_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_uint32.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_uint32.c new file mode 100644 index 0000000000..f2198d9e37 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_uint32.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_uint32: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_uint32 +// A'*B function: GB_AdotB__times_times_uint32 +// Z type : uint32_t (the type of C) +// XY type: uint32_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint32_t *restrict w = GB_thread_local.Work ; + + uint32_t *restrict Cx = C->x ; + const uint32_t *restrict Ax = A->x ; + const uint32_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint32_t aik = Ax [pa] ; + uint32_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint32_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint32_t aik = Ax [pa] ; + uint32_t t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_uint32 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint32_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint32_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint32_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint32_t *Cx = C->x ; + const uint32_t *Ax = A->x ; + const uint32_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint32_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint32_t aki = Ax [pa + k] ; // aki = A(k,i) + uint32_t bkj = Bx [pb] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint32_t aki = Ax [pa] ; // aki = A(k,i) + uint32_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint32_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_uint64.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_uint64.c new file mode 100644 index 0000000000..26b4d15c88 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_uint64.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_uint64: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_uint64 +// A'*B function: GB_AdotB__times_times_uint64 +// Z type : uint64_t (the type of C) +// XY type: uint64_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint64_t *restrict w = GB_thread_local.Work ; + + uint64_t *restrict Cx = C->x ; + const uint64_t *restrict Ax = A->x ; + const uint64_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint64_t aik = Ax [pa] ; + uint64_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint64_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint64_t aik = Ax [pa] ; + uint64_t t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_uint64 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint64_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint64_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint64_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint64_t *Cx = C->x ; + const uint64_t *Ax = A->x ; + const uint64_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint64_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint64_t aki = Ax [pa + k] ; // aki = A(k,i) + uint64_t bkj = Bx [pb] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint64_t aki = Ax [pa] ; // aki = A(k,i) + uint64_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint64_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Generated/GB_AxB__times_times_uint8.c b/GraphBLAS/Source/Generated/GB_AxB__times_times_uint8.c new file mode 100644 index 0000000000..248769be76 --- /dev/null +++ b/GraphBLAS/Source/Generated/GB_AxB__times_times_uint8.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB__times_times_uint8: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB__times_times_uint8 +// A'*B function: GB_AdotB__times_times_uint8 +// Z type : uint8_t (the type of C) +// XY type: uint8_t (the type of A and B) +// Identity: 1 (where cij *= 1 does not change cij) +// Multiply: t = (aik * bkj) +// Add: cij *= t + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB__times_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + uint8_t *restrict w = GB_thread_local.Work ; + + uint8_t *restrict Cx = C->x ; + const uint8_t *restrict Ax = A->x ; + const uint8_t *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + uint8_t aik = Ax [pa] ; + uint8_t t = aik * bkj ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + w [i] *= t ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = 1 ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = 1 ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + uint8_t bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + uint8_t aik = Ax [pa] ; + uint8_t t = aik * bkj ; + w [i] *= t ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB__times_times_uint8 +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + uint8_t aki = Ax [pa++] ; /* aki = A(k,i) */ \ + uint8_t bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + uint8_t t = aki * bkj ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + cij *= t ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + uint8_t *Cx = C->x ; + const uint8_t *Ax = A->x ; + const uint8_t *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + uint8_t cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for (int64_t k = 0 ; k < nrows ; k++) + { + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + uint8_t aki = Ax [pa + k] ; // aki = A(k,i) + uint8_t bkj = Bx [pb] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij *= t ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = 1 ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + uint8_t aki = Ax [pa] ; // aki = A(k,i) + uint8_t bkj = Bx [pb + k] ; // bkj = B(k,j) + uint8_t t = aki * bkj ; + cij *= t ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/GrB_BinaryOp_free.c b/GraphBLAS/Source/GrB_BinaryOp_free.c index 1cb0372340..2a3bf72778 100644 --- a/GraphBLAS/Source/GrB_BinaryOp_free.c +++ b/GraphBLAS/Source/GrB_BinaryOp_free.c @@ -24,7 +24,7 @@ GrB_Info GrB_BinaryOp_free // free a user-created binary operator if (op->magic == MAGIC) { op->magic = FREED ; // to help detect dangling pointers - GB_FREE_MEMORY (*binaryop) ; + GB_FREE_MEMORY (*binaryop, 1, sizeof (GB_BinaryOp_opaque)) ; } (*binaryop) = NULL ; } diff --git a/GraphBLAS/Source/GrB_Descriptor_free.c b/GraphBLAS/Source/GrB_Descriptor_free.c index c49a7d18e9..3412534fb7 100644 --- a/GraphBLAS/Source/GrB_Descriptor_free.c +++ b/GraphBLAS/Source/GrB_Descriptor_free.c @@ -21,7 +21,7 @@ GrB_Info GrB_Descriptor_free // free a descriptor if (desc != NULL && desc->magic == MAGIC) { desc->magic = FREED ; // to help detect dangling pointers - GB_FREE_MEMORY (*descriptor) ; + GB_FREE_MEMORY (*descriptor, 1, sizeof (GB_Descriptor_opaque)) ; } (*descriptor) = NULL ; } diff --git a/GraphBLAS/Source/GrB_Matrix_reduce_scalar.c b/GraphBLAS/Source/GrB_Matrix_reduce_scalar.c index 024fec099c..c234e164f7 100644 --- a/GraphBLAS/Source/GrB_Matrix_reduce_scalar.c +++ b/GraphBLAS/Source/GrB_Matrix_reduce_scalar.c @@ -65,7 +65,6 @@ GrB_Info GrB_Matrix_reduce_UDT // c = accum (c, reduce_to_scalar (A)) WHERE ("GrB_Matrix_reduce_UDT (&c, accum, reduce, A, desc)") ; RETURN_IF_NULL_OR_UNINITIALIZED (A) ; - APPLY_PENDING_UPDATES (A) ; // do this early (see spec) RETURN_IF_NULL_OR_UNINITIALIZED (reduce) ; return (GB_reduce_to_scalar (c, reduce->op->ztype, accum, reduce, A)) ; } diff --git a/GraphBLAS/Source/GrB_Monoid_free.c b/GraphBLAS/Source/GrB_Monoid_free.c index 3ce86664e1..1498659552 100644 --- a/GraphBLAS/Source/GrB_Monoid_free.c +++ b/GraphBLAS/Source/GrB_Monoid_free.c @@ -25,8 +25,8 @@ GrB_Info GrB_Monoid_free // free a user-created monoid // only user-defined monoids are freed. predefined monoids // are statically allocated and cannot be freed. mon->magic = FREED ; // to help detect dangling pointers - GB_FREE_MEMORY (mon->identity) ; - GB_FREE_MEMORY (*monoid) ; + GB_FREE_MEMORY (mon->identity, 1, mon->op->ztype->size) ; + GB_FREE_MEMORY (*monoid, 1, sizeof (GB_Monoid_opaque)) ; } (*monoid) = NULL ; } diff --git a/GraphBLAS/Source/GrB_Semiring_free.c b/GraphBLAS/Source/GrB_Semiring_free.c index a65f3c9619..4963a70ae4 100644 --- a/GraphBLAS/Source/GrB_Semiring_free.c +++ b/GraphBLAS/Source/GrB_Semiring_free.c @@ -25,7 +25,7 @@ GrB_Info GrB_Semiring_free // free a user-created semiring // only user-defined semirings are freed. predefined semirings // are statically allocated and cannot be freed. s->magic = FREED ; // to help detect dangling pointers - GB_FREE_MEMORY (*semiring) ; + GB_FREE_MEMORY (*semiring, 1, sizeof (GB_Semiring_opaque)) ; } (*semiring) = NULL ; } diff --git a/GraphBLAS/Source/GrB_Type_free.c b/GraphBLAS/Source/GrB_Type_free.c index 080876610f..d477de520a 100644 --- a/GraphBLAS/Source/GrB_Type_free.c +++ b/GraphBLAS/Source/GrB_Type_free.c @@ -24,7 +24,7 @@ GrB_Info GrB_Type_free // free a user-defined type if (t->magic == MAGIC) { t->magic = FREED ; // to help detect dangling pointers - GB_FREE_MEMORY (*type) ; + GB_FREE_MEMORY (*type, 1, sizeof (GB_Type_opaque)) ; } (*type) = NULL ; } diff --git a/GraphBLAS/Source/GrB_UnaryOp_free.c b/GraphBLAS/Source/GrB_UnaryOp_free.c index 7ddf7edddf..a79e1004b9 100644 --- a/GraphBLAS/Source/GrB_UnaryOp_free.c +++ b/GraphBLAS/Source/GrB_UnaryOp_free.c @@ -24,7 +24,7 @@ GrB_Info GrB_UnaryOp_free // free a user-created unary operator if (op->magic == MAGIC) { op->magic = FREED ; // to help detect dangling pointers - GB_FREE_MEMORY (*unaryop) ; + GB_FREE_MEMORY (*unaryop, 1, sizeof (GB_UnaryOp_opaque)) ; } (*unaryop) = NULL ; } diff --git a/GraphBLAS/Source/GrB_Vector_reduce_scalar.c b/GraphBLAS/Source/GrB_Vector_reduce_scalar.c index b4e7c1e035..4496aea301 100644 --- a/GraphBLAS/Source/GrB_Vector_reduce_scalar.c +++ b/GraphBLAS/Source/GrB_Vector_reduce_scalar.c @@ -60,7 +60,6 @@ GrB_Info GrB_Vector_reduce_UDT // c = accum (c, reduce_to_scalar (u)) // See comments on GrB_Matrix_reduce_UDT WHERE ("GrB_Vector_reduce_UDT (&c, accum, reduce, u, desc)") ; RETURN_IF_NULL_OR_UNINITIALIZED (u) ; - APPLY_PENDING_UPDATES (u) ; // do this early (see spec) RETURN_IF_NULL_OR_UNINITIALIZED (reduce) ; return (GB_reduce_to_scalar (c, reduce->op->ztype, accum, reduce, (GrB_Matrix) u)) ; diff --git a/GraphBLAS/Source/GrB_init.c b/GraphBLAS/Source/GrB_init.c index 4d2c0d4aa1..3d960698d4 100644 --- a/GraphBLAS/Source/GrB_init.c +++ b/GraphBLAS/Source/GrB_init.c @@ -65,16 +65,35 @@ _Thread_local GB_thread_local_struct GB_thread_local = .Flag = NULL, // initialized space .Flag_size = 0, // size of Flag array - // malloc tracking, for testing and debugging only - .nmalloc = 0, // memory block counter - .malloc_debug = false, // do not test memory handling - .malloc_debug_count = 0, // counter for testing memory handling - // random seed for each thread .seed = 1 } ; +//------------------------------------------------------------------------------ +// All Global storage is declared and initialized here +//------------------------------------------------------------------------------ + +// If the user creates threads that work on GraphBLAS matrices, then all of +// those threads must share the same matrix queue, and the same mode. + +GB_Global_struct GB_Global = +{ + + // queued matrices with work to do + .queue_head = NULL, // pointer to first queued matrix + + // GraphBLAS mode + .mode = GrB_NONBLOCKING, // default is nonblocking + + // malloc tracking, for testing, statistics, and debugging only + .nmalloc = 0, // memory block counter + .malloc_debug = false, // do not test memory handling + .malloc_debug_count = 0, // counter for testing memory handling + .inuse = 0, // memory space current in use + .maxused = 0 // high water memory usage +} ; + //------------------------------------------------------------------------------ // GrB_init //------------------------------------------------------------------------------ @@ -113,12 +132,24 @@ GrB_Info GrB_init // start up GraphBLAS GB_thread_local.report [0] = '\0' ; // queue of matrices for nonblocking mode and set the mode - GB_queue_init (mode) ; + #pragma omp critical (GB_queue) + { + // clear the queue + GB_Global.queue_head = NULL ; + + // set the mode: blocking or nonblocking + GB_Global.mode = mode ; // default is non-blocking + } // malloc tracking - GB_thread_local.nmalloc = 0 ; - GB_thread_local.malloc_debug = false ; - GB_thread_local.malloc_debug_count = 0 ; + #pragma omp critical (GB_memory) + { + GB_Global.nmalloc = 0 ; + GB_Global.malloc_debug = false ; + GB_Global.malloc_debug_count = 0 ; + GB_Global.inuse = 0 ; + GB_Global.maxused = 0 ; + } // workspace GB_thread_local.Mark = NULL ; // initialized space diff --git a/GraphBLAS/Source/GxB_SelectOp_free.c b/GraphBLAS/Source/GxB_SelectOp_free.c index d46363b8d7..cb1dcdcd77 100644 --- a/GraphBLAS/Source/GxB_SelectOp_free.c +++ b/GraphBLAS/Source/GxB_SelectOp_free.c @@ -24,7 +24,7 @@ GrB_Info GxB_SelectOp_free // free a user-created select operator if (op->magic == MAGIC) { op->magic = FREED ; // to help detect dangling pointers - GB_FREE_MEMORY (*selectop) ; + GB_FREE_MEMORY (*selectop, 1, sizeof (GB_SelectOp_opaque)) ; } (*selectop) = NULL ; } diff --git a/GraphBLAS/Source/GxB_stats.c b/GraphBLAS/Source/GxB_stats.c new file mode 100644 index 0000000000..0e9c499ffc --- /dev/null +++ b/GraphBLAS/Source/GxB_stats.c @@ -0,0 +1,52 @@ +//------------------------------------------------------------------------------ +// GxB_stats: return memory usage and other statistics +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +#include "GB.h" + +GrB_Info GxB_stats +( + GxB_Statistics *stats +) +{ + + //-------------------------------------------------------------------------- + // check inputs + //-------------------------------------------------------------------------- + + WHERE ("GxB_stats (&stats) ;") ; + RETURN_IF_NULL (stats) ; + + //-------------------------------------------------------------------------- + // get memory usage + //-------------------------------------------------------------------------- + + #pragma omp critical (GB_memory) + { + stats->nmalloc = GB_Global.nmalloc ; + stats->inuse = GB_Global.inuse ; + stats->maxused = GB_Global.maxused ; + GB_Global.maxused = GB_Global.inuse ; + } + + //-------------------------------------------------------------------------- + // clear remainder of stats + //-------------------------------------------------------------------------- + + // these components are reserved for future use, so that new statistics can + // be added without requiring a prior user application to be recompiled. + + for (int i = 0 ; i < 20 ; i++) + { + stats->future [i] = 0 ; + stats->xfuture [i] = 0 ; + } + + return (REPORT_SUCCESS) ; +} + diff --git a/GraphBLAS/Source/README.txt b/GraphBLAS/Source/README.txt index 4018924d42..2f08367860 100644 --- a/GraphBLAS/Source/README.txt +++ b/GraphBLAS/Source/README.txt @@ -9,8 +9,11 @@ end-user applications. The Template/* files are not compiled separately, but are #include'd into files in this folder instead. +The Generated/* files are created by the axb*m scripts from Template/GB_AxB.* +and should not be editted. + If you don't have the "make" command, try this, or its equivalent, to compile all of GraphBLAS: - cc -I../Include -ITemplate *.c + cc -I../Include -ITemplate -IGenerated *.c Generated/*.c diff --git a/GraphBLAS/Source/Template/GB_AxB.c b/GraphBLAS/Source/Template/GB_AxB.c new file mode 100644 index 0000000000..7fca430646 --- /dev/null +++ b/GraphBLAS/Source/Template/GB_AxB.c @@ -0,0 +1,513 @@ +//------------------------------------------------------------------------------ +// GB_AxB: hard-coded C=A*B +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +// If this filename has a double underscore in its name ("__") then it has +// been automatically constructed from Template/GB*AxB.[ch], via the axb*.m +// scripts, and should not be editted. Edit the original source file instead. + +//------------------------------------------------------------------------------ + +#include "GB.h" +#ifndef GBCOMPACT +#include "GB_AxB_methods.h" + +// The C=A*B semiring is defined by the following types and operators: + +// A*B function: GB_AxB +// A'*B function: GB_AdotB +// Z type : ztype (the type of C) +// XY type: xytype (the type of A and B) +// Identity: identity (where ADD(cij,identity) does not change cij) +// Multiply: t = (MULT(aik, bkj)) +// Add: ADD(cij, t) + +//------------------------------------------------------------------------------ +// C=A*B and C=A*B: outer product +//------------------------------------------------------------------------------ + +void GB_AxB +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, and C + //-------------------------------------------------------------------------- + + // w has size C->nrows == A->nrows, each entry size zsize. uninitialized. + ztype *restrict w = GB_thread_local.Work ; + + ztype *restrict Cx = C->x ; + const xytype *restrict Ax = A->x ; + const xytype *restrict Bx = B->x ; + + const int64_t n = C->ncols ; + const int64_t *restrict Ap = A->p ; + const int64_t *restrict Ai = A->i ; + const int64_t *restrict Bp = B->p ; + const int64_t *restrict Bi = B->i ; + + if (Mask != NULL) + { + + //---------------------------------------------------------------------- + // C = A*B where Mask is pattern of C, with zombies + //---------------------------------------------------------------------- + + // get the Flag workspace (already allocated and cleared) + int8_t *restrict Flag = GB_thread_local.Flag ; + + // get the mask + const int64_t *restrict Maskp = Mask->p ; + const int64_t *restrict Maski = Mask->i ; + const void *restrict Maskx = Mask->x ; + GB_cast_function cast_Mask = + GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + size_t msize = Mask->type->size ; + + #ifdef WITH_ZOMBIES + // copy Maskp into C->p + memcpy (C->p, Maskp, (n+1) * sizeof (int64_t)) ; + C->magic = MAGIC ; + #else + int64_t cnz = 0 ; + int64_t *restrict Cp = C->p ; + #endif + + int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //------------------------------------------------------------------ + // compute C(;,j) = A * B(:,j), both values and pattern + //------------------------------------------------------------------ + + // skip this column j if the Mask is empty + #ifndef WITH_ZOMBIES + Cp [j] = cnz ; + #endif + int64_t mlo, mhi ; + if (empty (Maskp, Maski, j, &mlo, &mhi)) continue ; + bool marked = false ; + + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + // skip A(:,k) if empty or if entries out of range of Mask + int64_t alo, ahi ; + if (empty (Ap, Ai, k, &alo, &ahi)) continue ; + if (ahi < mlo || alo > mhi) continue ; + // scatter Mask(:,j) into Flag if not yet done + scatter_mask (j, Maskp, Maski, Maskx, msize, cast_Mask, Flag, + &marked) ; + xytype bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += (A(i,k) * B(k,j)) .* Mask(i,j) + int64_t i = Ai [pa] ; + int8_t flag = Flag [i] ; + if (flag == 0) continue ; + // Mask(i,j) == 1 so do the work + xytype aik = Ax [pa] ; + ztype t = MULT(aik, bkj) ; + if (flag > 0) + { + // first time C(i,j) seen + Flag [i] = -1 ; + w [i] = t ; + } + else + { + // C(i,j) seen before, update it + ADD(w [i], t) ; + } + } + } + + #ifdef WITH_ZOMBIES + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [p] = w [i] ; + Ci [p] = i ; + } + else + { + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = identity ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + Flag [i] = 0 ; + } + } + else + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is a zombie; in the Mask but not in A*B + Cx [p] = identity ; + Ci [p] = FLIP (i) ; + C->nzombies++ ; + } + } + + #else + + // gather C(:,j), both values and pattern, from the Mask(:,j) + if (marked) + { + for (int64_t p = Maskp [j] ; p < Maskp [j+1] ; p++) + { + int64_t i = Maski [p] ; + // C(i,j) is present + if (Flag [i] < 0) + { + // C(i,j) is a live entry, gather its row and value + Cx [cnz] = w [i] ; + Ci [cnz++] = i ; + } + Flag [i] = 0 ; + } + } + + #endif + + } + + #ifdef WITH_ZOMBIES + // place C in the queue if it has zombies + GB_queue_insert (C) ; + #else + Cp [n] = cnz ; + #endif + + } + else + { + + //---------------------------------------------------------------------- + // C = A*B with pattern of C computed by GB_AxB_symbolic + //---------------------------------------------------------------------- + + const int64_t *restrict Cp = C->p ; + const int64_t *restrict Ci = C->i ; + + for (int64_t j = 0 ; j < n ; j++) + { + // clear w + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + w [Ci [p]] = identity ; + } + // compute C(;,j) + for (int64_t p = Bp [j] ; p < Bp [j+1] ; p++) + { + // B(k,j) is present + int64_t k = Bi [p] ; + xytype bkj = Bx [p] ; + for (int64_t pa = Ap [k] ; pa < Ap [k+1] ; pa++) + { + // w [i] += A(i,k) * B(k,j) + int64_t i = Ai [pa] ; + xytype aik = Ax [pa] ; + ztype t = MULT(aik, bkj) ; + ADD(w [i], t) ; + } + } + // gather C(:,j) + for (int64_t p = Cp [j] ; p < Cp [j+1] ; p++) + { + Cx [p] = w [Ci [p]] ; + } + } + } +} + + +//------------------------------------------------------------------------------ +// C=A'*B: dot product +//------------------------------------------------------------------------------ + +void GB_AdotB +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip // if true, A and B have been swapped +) +{ + + //-------------------------------------------------------------------------- + // get A, B, C, and Mask + //-------------------------------------------------------------------------- + + const int64_t *Ai = A->i ; + const int64_t *Bi = B->i ; + const int64_t *Ap = A->p ; + const int64_t *Bp = B->p ; + int64_t *Ci = C->i ; + int64_t *Cp = C->p ; + int64_t n = B->ncols ; + int64_t m = A->ncols ; + int64_t nrows = B->nrows ; + ASSERT (C->ncols == n) ; + ASSERT (C->nrows == m) ; + + int64_t cnz = 0 ; + + const int64_t *Maskp = NULL ; + const int64_t *Maski = NULL ; + const void *Maskx = NULL ; + GB_cast_function cast_Mask = NULL ; + size_t msize = 0 ; + + if (Mask != NULL) + { + Maskp = Mask->p ; + Maski = Mask->i ; + Maskx = Mask->x ; + msize = Mask->type->size ; + // get the function pointer for casting Mask(i,j) from its current + // type into boolean + cast_Mask = GB_cast_factory (GB_BOOL_code, Mask->type->code) ; + } + + #define MERGE \ + { \ + xytype aki = Ax [pa++] ; /* aki = A(k,i) */ \ + xytype bkj = Bx [pb++] ; /* bjk = B(k,j) */ \ + ztype t = MULT(aki, bkj) ; \ + if (cij_exists) \ + { \ + /* cij += A(k,i) * B(k,j) */ \ + ADD(cij, t) ; \ + } \ + else \ + { \ + /* cij = A(k,i) * B(k,j) */ \ + cij_exists = true ; \ + cij = t ; \ + } \ + } + + ztype *Cx = C->x ; + const xytype *Ax = A->x ; + const xytype *Bx = B->x ; + + for (int64_t j = 0 ; j < n ; j++) + { + + //---------------------------------------------------------------------- + // C(:,j) = A'*B(:,j) + //---------------------------------------------------------------------- + + int64_t pb_start, pb_end, bjnz, ib_first, ib_last, kk1, kk2 ; + if (!jinit (Cp, j, cnz, Bp, Bi, Maskp, m, &pb_start, &pb_end, + &bjnz, &ib_first, &ib_last, &kk1, &kk2)) continue ; + + for (int64_t kk = kk1 ; kk < kk2 ; kk++) + { + + //------------------------------------------------------------------ + // compute cij = A(:,i)' * B(:,j), using the semiring + //------------------------------------------------------------------ + + ztype cij ; + bool cij_exists = false ; // C(i,j) not yet in the pattern + int64_t i, pa, pa_end, pb, ainz ; + if (!cij_init (kk, Maski, Maskx, cast_Mask, msize, + Ap, Ai, ib_first, ib_last, pb_start, + &i, &pa, &pa_end, &pb, &ainz)) continue ; + + // B(:,j) and A(:,i) both have at least one entry + + if (bjnz == nrows && ainz == nrows) + { + + //-------------------------------------------------------------- + // both A(:,i) and B(:,j) are dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = identity ; + for (int64_t k = 0 ; k < nrows ; k++) + { + xytype aki = Ax [pa + k] ; // aki = A(k,i) + xytype bkj = Bx [pb + k] ; // bkj = B(k,j) + ztype t = MULT(aki, bkj) ; + ADD(cij, t) ; + } + + } + else if (ainz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is dense and B(:,j) is sparse + //-------------------------------------------------------------- + + cij_exists = true ; + cij = identity ; + for ( ; pb < pb_end ; pb++) + { + int64_t k = Bi [pb] ; + xytype aki = Ax [pa + k] ; // aki = A(k,i) + xytype bkj = Bx [pb] ; // bkj = B(k,j) + ztype t = MULT(aki, bkj) ; + ADD(cij, t) ; + } + + } + else if (bjnz == nrows) + { + + //-------------------------------------------------------------- + // A(:,i) is sparse and B(:,j) is dense + //-------------------------------------------------------------- + + cij_exists = true ; + cij = identity ; + for ( ; pa < pa_end ; pa++) + { + int64_t k = Ai [pa] ; + xytype aki = Ax [pa] ; // aki = A(k,i) + xytype bkj = Bx [pb + k] ; // bkj = B(k,j) + ztype t = MULT(aki, bkj) ; + ADD(cij, t) ; + } + + } + else if (ainz > 32 * bjnz) + { + + //-------------------------------------------------------------- + // B(:,j) is very sparse compared to A(:,i) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + // discard all entries A(ia:ib-1,i) + int64_t pleft = pa + 1 ; + int64_t pright = pa_end ; + GB_BINARY_TRIM_SEARCH (ib, Ai, pleft, pright) ; + ASSERT (pleft > pa) ; + pa = pleft ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else if (bjnz > 32 * ainz) + { + + //-------------------------------------------------------------- + // A(:,i) is very sparse compared to B(:,j) + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + // discard all entries B(ib:ia-1,j) + int64_t pleft = pb + 1 ; + int64_t pright = pb_end ; + GB_BINARY_TRIM_SEARCH (ia, Bi, pleft, pright) ; + ASSERT (pleft > pb) ; + pb = pleft ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + + } + else + { + + //-------------------------------------------------------------- + // A(:,i) and B(:,j) have about the same sparsity + //-------------------------------------------------------------- + + while (pa < pa_end && pb < pb_end) + { + int64_t ia = Ai [pa] ; + int64_t ib = Bi [pb] ; + if (ia < ib) + { + // A(ia,i) appears before B(ib,j) + pa++ ; + } + else if (ib < ia) + { + // B(ib,j) appears before A(ia,i) + pb++ ; + } + else // ia == ib == k + { + // A(k,i) and B(k,j) are the next entries to merge + MERGE ; + } + } + } + + if (cij_exists) + { + // C(i,j) = cij + Cx [cnz] = cij ; + Ci [cnz++] = i ; + } + } + } + // log the end of the last column + Cp [n] = cnz ; +} + +#undef MERGE + +#endif diff --git a/GraphBLAS/Source/Template/GB_AxB.h b/GraphBLAS/Source/Template/GB_AxB.h new file mode 100644 index 0000000000..91d85de70e --- /dev/null +++ b/GraphBLAS/Source/Template/GB_AxB.h @@ -0,0 +1,18 @@ +void GB_AxB +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + +void GB_AdotB +( + GrB_Matrix C, + const GrB_Matrix Mask, + const GrB_Matrix A, + const GrB_Matrix B, + bool flip +) ; + diff --git a/GraphBLAS/Source/Template/GB_AxB_compare_template.c b/GraphBLAS/Source/Template/GB_AxB_compare_template.c index d6fc4b809c..c66859a8d6 100644 --- a/GraphBLAS/Source/Template/GB_AxB_compare_template.c +++ b/GraphBLAS/Source/Template/GB_AxB_compare_template.c @@ -7,7 +7,7 @@ //------------------------------------------------------------------------------ -// A template file #include'd in GB_AxB_builtin.c +// A template file #include'd in GB_AxB_factory.c // The multiply operator is a comparator: EQ, NE, GT, LT, GE, LE. // z=f(x,y): x and x are either boolean or non-boolean. z is boolean. @@ -19,9 +19,9 @@ // There is one special case to consider. For boolean x, y, and z, the // function z=NE(x,y) is the same as z=XOR(x,y). If z is boolean, the multiply -// operator NE has already been renamed XOR by GB_AxB_builtin, and thus NE will -// never use the boolean case, below. Thus it is removed with the #ifndef -// NO_BOOLEAN. +// operator NE has already been renamed XOR by GB_AxB_semiring_builtin, and +// thus NE will never use the boolean case, below. Thus it is removed with the +// #ifndef NO_BOOLEAN. ASSERT (zcode == GB_BOOL_code) ; { @@ -35,122 +35,92 @@ ASSERT (zcode == GB_BOOL_code) ; case GB_LOR_opcode : - #define ADD(w,t) w = (w || t) switch (xycode) { - // xycode ztype xytype identity - #define MULT(x,y) IMULT(x,y) #ifndef NO_BOOLEAN - case GB_BOOL_code : AxB (bool , bool , false) ; + case GB_BOOL_code : AxB (_lor, mult, _bool ) #endif - case GB_INT8_code : AxB (bool , int8_t , false) ; - case GB_UINT8_code : AxB (bool , uint8_t , false) ; - case GB_INT16_code : AxB (bool , int16_t , false) ; - case GB_UINT16_code : AxB (bool , uint16_t, false) ; - case GB_INT32_code : AxB (bool , int32_t , false) ; - case GB_UINT32_code : AxB (bool , uint32_t, false) ; - case GB_INT64_code : AxB (bool , int64_t , false) ; - case GB_UINT64_code : AxB (bool , uint64_t, false) ; - #undef MULT - #define MULT(x,y) FMULT(x,y) - case GB_FP32_code : AxB (bool , float , false) ; - case GB_FP64_code : AxB (bool , double , false) ; - #undef MULT + case GB_INT8_code : AxB (_lor, mult, _int8 ) + case GB_UINT8_code : AxB (_lor, mult, _uint8 ) + case GB_INT16_code : AxB (_lor, mult, _int16 ) + case GB_UINT16_code : AxB (_lor, mult, _uint16) + case GB_INT32_code : AxB (_lor, mult, _int32 ) + case GB_UINT32_code : AxB (_lor, mult, _uint32) + case GB_INT64_code : AxB (_lor, mult, _int64 ) + case GB_UINT64_code : AxB (_lor, mult, _uint64) + case GB_FP32_code : AxB (_lor, mult, _fp32 ) + case GB_FP64_code : AxB (_lor, mult, _fp64 ) default: ; } break ; - #undef ADD case GB_LAND_opcode : - #define ADD(w,t) w = (w && t) switch (xycode) { - // xycode ztype xytype identity - #define MULT(x,y) IMULT(x,y) #ifndef NO_BOOLEAN - case GB_BOOL_code : AxB (bool , bool , true) ; + case GB_BOOL_code : AxB (_land, mult, _bool ) #endif - case GB_INT8_code : AxB (bool , int8_t , true) ; - case GB_UINT8_code : AxB (bool , uint8_t , true) ; - case GB_INT16_code : AxB (bool , int16_t , true) ; - case GB_UINT16_code : AxB (bool , uint16_t, true) ; - case GB_INT32_code : AxB (bool , int32_t , true) ; - case GB_UINT32_code : AxB (bool , uint32_t, true) ; - case GB_INT64_code : AxB (bool , int64_t , true) ; - case GB_UINT64_code : AxB (bool , uint64_t, true) ; - #undef MULT - #define MULT(x,y) FMULT(x,y) - case GB_FP32_code : AxB (bool , float , true) ; - case GB_FP64_code : AxB (bool , double , true) ; - #undef MULT + case GB_INT8_code : AxB (_land, mult, _int8 ) + case GB_UINT8_code : AxB (_land, mult, _uint8 ) + case GB_INT16_code : AxB (_land, mult, _int16 ) + case GB_UINT16_code : AxB (_land, mult, _uint16) + case GB_INT32_code : AxB (_land, mult, _int32 ) + case GB_UINT32_code : AxB (_land, mult, _uint32) + case GB_INT64_code : AxB (_land, mult, _int64 ) + case GB_UINT64_code : AxB (_land, mult, _uint64) + case GB_FP32_code : AxB (_land, mult, _fp32 ) + case GB_FP64_code : AxB (_land, mult, _fp64 ) default: ; } break ; - #undef ADD case GB_LXOR_opcode : - #define ADD(w,t) w = (w != t) switch (xycode) { - // xycode ztype xytype identity - #define MULT(x,y) IMULT(x,y) #ifndef NO_BOOLEAN - case GB_BOOL_code : AxB (bool , bool , false) ; + case GB_BOOL_code : AxB (_lxor, mult, _bool ) #endif - case GB_INT8_code : AxB (bool , int8_t , false) ; - case GB_UINT8_code : AxB (bool , uint8_t , false) ; - case GB_INT16_code : AxB (bool , int16_t , false) ; - case GB_UINT16_code : AxB (bool , uint16_t, false) ; - case GB_INT32_code : AxB (bool , int32_t , false) ; - case GB_UINT32_code : AxB (bool , uint32_t, false) ; - case GB_INT64_code : AxB (bool , int64_t , false) ; - case GB_UINT64_code : AxB (bool , uint64_t, false) ; - #undef MULT - #define MULT(x,y) FMULT(x,y) - case GB_FP32_code : AxB (bool , float , false) ; - case GB_FP64_code : AxB (bool , double , false) ; - #undef MULT + case GB_INT8_code : AxB (_lxor, mult, _int8 ) + case GB_UINT8_code : AxB (_lxor, mult, _uint8 ) + case GB_INT16_code : AxB (_lxor, mult, _int16 ) + case GB_UINT16_code : AxB (_lxor, mult, _uint16) + case GB_INT32_code : AxB (_lxor, mult, _int32 ) + case GB_UINT32_code : AxB (_lxor, mult, _uint32) + case GB_INT64_code : AxB (_lxor, mult, _int64 ) + case GB_UINT64_code : AxB (_lxor, mult, _uint64) + case GB_FP32_code : AxB (_lxor, mult, _fp32 ) + case GB_FP64_code : AxB (_lxor, mult, _fp64 ) default: ; } break ; - #undef ADD case GB_EQ_opcode : - #define ADD(w,t) w = (w == t) switch (xycode) { - // xycode ztype xytype identity - #define MULT(x,y) IMULT(x,y) #ifndef NO_BOOLEAN - case GB_BOOL_code : AxB (bool , bool , true) ; + case GB_BOOL_code : AxB (_eq, mult, _bool ) #endif - case GB_INT8_code : AxB (bool , int8_t , true) ; - case GB_UINT8_code : AxB (bool , uint8_t , true) ; - case GB_INT16_code : AxB (bool , int16_t , true) ; - case GB_UINT16_code : AxB (bool , uint16_t, true) ; - case GB_INT32_code : AxB (bool , int32_t , true) ; - case GB_UINT32_code : AxB (bool , uint32_t, true) ; - case GB_INT64_code : AxB (bool , int64_t , true) ; - case GB_UINT64_code : AxB (bool , uint64_t, true) ; - #undef MULT - #define MULT(x,y) FMULT(x,y) - case GB_FP32_code : AxB (bool , float , true) ; - case GB_FP64_code : AxB (bool , double , true) ; - #undef MULT + case GB_INT8_code : AxB (_eq, mult, _int8 ) + case GB_UINT8_code : AxB (_eq, mult, _uint8 ) + case GB_INT16_code : AxB (_eq, mult, _int16 ) + case GB_UINT16_code : AxB (_eq, mult, _uint16) + case GB_INT32_code : AxB (_eq, mult, _int32 ) + case GB_UINT32_code : AxB (_eq, mult, _uint32) + case GB_INT64_code : AxB (_eq, mult, _int64 ) + case GB_UINT64_code : AxB (_eq, mult, _uint64) + case GB_FP32_code : AxB (_eq, mult, _fp32 ) + case GB_FP64_code : AxB (_eq, mult, _fp64 ) default: ; } break ; - #undef ADD default: ; } } #undef NO_BOOLEAN -#undef MULT -#undef IMULT -#undef FMULT +#undef mult diff --git a/GraphBLAS/Source/Template/GB_AxB_factory.c b/GraphBLAS/Source/Template/GB_AxB_factory.c index 2960bc3c4d..bd13d003ab 100644 --- a/GraphBLAS/Source/Template/GB_AxB_factory.c +++ b/GraphBLAS/Source/Template/GB_AxB_factory.c @@ -20,8 +20,7 @@ // 44 semirings: (min,max,plus,times) for non-boolean, and // (or,and,xor,eq) for boolean - #define IMULT(x,y) x - #define FMULT(x,y) x + #define mult _first #include "GB_AxB_template.c" break ; @@ -31,8 +30,7 @@ // 44 semirings: (min,max,plus,times) for non-boolean, and // (or,and,xor,eq) for boolean - #define IMULT(x,y) y - #define FMULT(x,y) y + #define mult _second #include "GB_AxB_template.c" break ; @@ -43,8 +41,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // MIN == TIMES == AND for boolean #define NO_BOOLEAN - #define IMULT(x,y) IMIN (x,y) - #define FMULT(x,y) FMIN (x,y) + #define mult _min #include "GB_AxB_template.c" break ; @@ -55,8 +52,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // MAX == PLUS == OR for boolean #define NO_BOOLEAN - #define IMULT(x,y) IMAX (x,y) - #define FMULT(x,y) FMAX (x,y) + #define mult _max #include "GB_AxB_template.c" break ; @@ -67,8 +63,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // MAX == PLUS == OR for boolean #define NO_BOOLEAN - #define IMULT(x,y) (x + y) - #define FMULT(x,y) (x + y) + #define mult _plus #include "GB_AxB_template.c" break ; @@ -79,8 +74,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // MINUS == NE == ISNE == XOR for boolean #define NO_BOOLEAN - #define IMULT(x,y) (flipxy ? (y-x) : (x-y)) - #define FMULT(x,y) (flipxy ? (y-x) : (x-y)) + #define mult _minus #include "GB_AxB_template.c" break ; @@ -91,8 +85,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // MIN == TIMES == AND for boolean #define NO_BOOLEAN - #define IMULT(x,y) (x * y) - #define FMULT(x,y) (x * y) + #define mult _times #include "GB_AxB_template.c" break ; @@ -104,8 +97,7 @@ // FIRST == DIV for boolean // See Source/GB.h for disscusion on integer division #define NO_BOOLEAN - #define IMULT(x,y) (flipxy ? IDIV(y,x) : IDIV(x,y)) - #define FMULT(x,y) (flipxy ? (y/x) : (x/y)) + #define mult _div #include "GB_AxB_template.c" break ; @@ -116,8 +108,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // ISEQ == EQ for boolean #define NO_BOOLEAN - #define IMULT(x,y) (x == y) - #define FMULT(x,y) (x == y) + #define mult _iseq #include "GB_AxB_template.c" break ; @@ -128,8 +119,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // MINUS == NE == ISNE == XOR for boolean #define NO_BOOLEAN - #define IMULT(x,y) (x != y) - #define FMULT(x,y) (x != y) + #define mult _isne #include "GB_AxB_template.c" break ; @@ -140,8 +130,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // ISGT == GT for boolean #define NO_BOOLEAN - #define IMULT(x,y) (x > y) - #define FMULT(x,y) (x > y) + #define mult _isgt #include "GB_AxB_template.c" break ; @@ -152,8 +141,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // ISLT == LT for boolean #define NO_BOOLEAN - #define IMULT(x,y) (x < y) - #define FMULT(x,y) (x < y) + #define mult _islt #include "GB_AxB_template.c" break ; @@ -164,8 +152,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // ISGE == GE for boolean #define NO_BOOLEAN - #define IMULT(x,y) (x >= y) - #define FMULT(x,y) (x >= y) + #define mult _isge #include "GB_AxB_template.c" break ; @@ -176,8 +163,7 @@ // 40 semirings: (min,max,plus,times) for non-boolean // ISLE == LE for boolean #define NO_BOOLEAN - #define IMULT(x,y) (x <= y) - #define FMULT(x,y) (x <= y) + #define mult _isle #include "GB_AxB_template.c" break ; @@ -186,8 +172,7 @@ //---------------------------------------------------------------------- // 44 semirings: (and,or,xor,eq) * (11 types) - #define IMULT(x,y) (x == y) - #define FMULT(x,y) (x == y) + #define mult _eq #include "GB_AxB_compare_template.c" break ; @@ -198,8 +183,7 @@ // 40 semirings: (and,or,xor,eq) * (10 types) // MINUS == NE == ISNE == XOR for boolean #define NO_BOOLEAN - #define IMULT(x,y) (x != y) - #define FMULT(x,y) (x != y) + #define mult _ne #include "GB_AxB_compare_template.c" break ; @@ -208,8 +192,7 @@ //---------------------------------------------------------------------- // 44 semirings: (and,or,xor,eq) * (11 types) - #define IMULT(x,y) (x > y) - #define FMULT(x,y) (x > y) + #define mult _gt #include "GB_AxB_compare_template.c" break ; @@ -218,8 +201,7 @@ //---------------------------------------------------------------------- // 44 semirings: (and,or,xor,eq) * (11 types) - #define IMULT(x,y) (x < y) - #define FMULT(x,y) (x < y) + #define mult _lt #include "GB_AxB_compare_template.c" break ; @@ -228,8 +210,7 @@ //---------------------------------------------------------------------- // 44 semirings: (and,or,xor,eq) * (11 types) - #define IMULT(x,y) (x >= y) - #define FMULT(x,y) (x >= y) + #define mult _ge #include "GB_AxB_compare_template.c" break ; @@ -238,8 +219,7 @@ //---------------------------------------------------------------------- // 44 semirings: (and,or,xor,eq) * (11 types) - #define IMULT(x,y) (x <= y) - #define FMULT(x,y) (x <= y) + #define mult _le #include "GB_AxB_compare_template.c" break ; @@ -253,8 +233,7 @@ // 44 semirings: (min,max,plus,times) for non-boolean, and // (or,and,xor,eq) for boolean - #define IMULT(x,y) ((x != 0) || (y != 0)) - #define FMULT(x,y) ((x != 0) || (y != 0)) + #define mult _lor #include "GB_AxB_template.c" break ; @@ -264,8 +243,7 @@ // 44 semirings: (min,max,plus,times) for non-boolean, and // (or,and,xor,eq) for boolean - #define IMULT(x,y) ((x != 0) && (y != 0)) - #define FMULT(x,y) ((x != 0) && (y != 0)) + #define mult _land #include "GB_AxB_template.c" break ; @@ -275,8 +253,7 @@ // 44 semirings: (min,max,plus,times) for non-boolean, and // (or,and,xor,eq) for boolean - #define IMULT(x,y) ((x != 0) != (y != 0)) - #define FMULT(x,y) ((x != 0) != (y != 0)) + #define mult _lxor #include "GB_AxB_template.c" break ; diff --git a/GraphBLAS/Source/Template/GB_AxB_template.c b/GraphBLAS/Source/Template/GB_AxB_template.c index 33fb9adb36..b93a21105f 100644 --- a/GraphBLAS/Source/Template/GB_AxB_template.c +++ b/GraphBLAS/Source/Template/GB_AxB_template.c @@ -7,11 +7,9 @@ //------------------------------------------------------------------------------ -// A template file #include'd in GB_AxB_builtin.c +// A template file #include'd in GB_AxB_factory.c -// This file is used for 17 operators, which is #defined as IMULT(x,y) and -// FMULT(x,y) by the including file. IMULT(x,y) is used for integers and -// FMULT(x,y) for floating-point. The multiply operator is combined here +// This file is used for 17 operators. The multiply operator is combined here // with 40 or 44 monoids to create 40 or 44 unique semiring workers. // FIRST, SECOND, MIN, MAX, PLUS, MINUS, TIMES, DIV, @@ -37,25 +35,16 @@ if (zcode != GB_BOOL_code) switch (zcode) { - // zcode ztype xytype identity - #define ADD(w,t) w = IMIN (w,t) - #define MULT(x,y) IMULT(x,y) - case GB_INT8_code : AxB (int8_t , int8_t , INT8_MAX) ; - case GB_UINT8_code : AxB (uint8_t , uint8_t , UINT8_MAX) ; - case GB_INT16_code : AxB (int16_t , int16_t , INT16_MAX) ; - case GB_UINT16_code : AxB (uint16_t, uint16_t, UINT16_MAX) ; - case GB_INT32_code : AxB (int32_t , int32_t , INT32_MAX) ; - case GB_UINT32_code : AxB (uint32_t, uint32_t, UINT32_MAX) ; - case GB_INT64_code : AxB (int64_t , int64_t , INT64_MAX) ; - case GB_UINT64_code : AxB (uint64_t, uint64_t, UINT64_MAX) ; - #undef ADD - #undef MULT - #define ADD(w,t) w = FMIN (w,t) - #define MULT(x,y) FMULT(x,y) - case GB_FP32_code : AxB (float , float , INFINITY) ; - case GB_FP64_code : AxB (double , double , INFINITY) ; - #undef ADD - #undef MULT + case GB_INT8_code : AxB (_min, mult, _int8 ) + case GB_UINT8_code : AxB (_min, mult, _uint8 ) + case GB_INT16_code : AxB (_min, mult, _int16 ) + case GB_UINT16_code : AxB (_min, mult, _uint16) + case GB_INT32_code : AxB (_min, mult, _int32 ) + case GB_UINT32_code : AxB (_min, mult, _uint32) + case GB_INT64_code : AxB (_min, mult, _int64 ) + case GB_UINT64_code : AxB (_min, mult, _uint64) + case GB_FP32_code : AxB (_min, mult, _fp32 ) + case GB_FP64_code : AxB (_min, mult, _fp64 ) default: ; } break ; @@ -64,129 +53,74 @@ if (zcode != GB_BOOL_code) switch (zcode) { - // zcode ztype xytype identity - #define ADD(w,t) w = IMAX (w,t) - #define MULT(x,y) IMULT(x,y) - case GB_INT8_code : AxB (int8_t , int8_t , INT8_MIN) ; - case GB_UINT8_code : AxB (uint8_t , uint8_t , 0) ; - case GB_INT16_code : AxB (int16_t , int16_t , INT16_MIN) ; - case GB_UINT16_code : AxB (uint16_t, uint16_t, 0) ; - case GB_INT32_code : AxB (int32_t , int32_t , INT32_MIN) ; - case GB_UINT32_code : AxB (uint32_t, uint32_t, 0) ; - case GB_INT64_code : AxB (int64_t , int64_t , INT64_MIN) ; - case GB_UINT64_code : AxB (uint64_t, uint64_t, 0) ; - #undef ADD - #undef MULT - #define ADD(w,t) w = FMAX (w,t) - #define MULT(x,y) FMULT(x,y) - case GB_FP32_code : AxB (float , float , -INFINITY) ; - case GB_FP64_code : AxB (double , double , -INFINITY) ; - #undef ADD - #undef MULT + case GB_INT8_code : AxB (_max, mult, _int8 ) + case GB_UINT8_code : AxB (_max, mult, _uint8 ) + case GB_INT16_code : AxB (_max, mult, _int16 ) + case GB_UINT16_code : AxB (_max, mult, _uint16) + case GB_INT32_code : AxB (_max, mult, _int32 ) + case GB_UINT32_code : AxB (_max, mult, _uint32) + case GB_INT64_code : AxB (_max, mult, _int64 ) + case GB_UINT64_code : AxB (_max, mult, _uint64) + case GB_FP32_code : AxB (_max, mult, _fp32 ) + case GB_FP64_code : AxB (_max, mult, _fp64 ) default: ; } break ; case GB_PLUS_opcode : // w += t, identity is 0 - #define ADD(w,t) w += t switch (zcode) { - // zcode ztype xytype identity - #define MULT(x,y) IMULT(x,y) - case GB_INT8_code : AxB (int8_t , int8_t , 0) ; - case GB_UINT8_code : AxB (uint8_t , uint8_t , 0) ; - case GB_INT16_code : AxB (int16_t , int16_t , 0) ; - case GB_UINT16_code : AxB (uint16_t, uint16_t, 0) ; - case GB_INT32_code : AxB (int32_t , int32_t , 0) ; - case GB_UINT32_code : AxB (uint32_t, uint32_t, 0) ; - case GB_INT64_code : AxB (int64_t , int64_t , 0) ; - case GB_UINT64_code : AxB (uint64_t, uint64_t, 0) ; - #undef MULT - #define MULT(x,y) FMULT(x,y) - case GB_FP32_code : AxB (float , float , 0) ; - case GB_FP64_code : AxB (double , double , 0) ; - #undef MULT + case GB_INT8_code : AxB (_plus, mult, _int8 ) + case GB_UINT8_code : AxB (_plus, mult, _uint8 ) + case GB_INT16_code : AxB (_plus, mult, _int16 ) + case GB_UINT16_code : AxB (_plus, mult, _uint16) + case GB_INT32_code : AxB (_plus, mult, _int32 ) + case GB_UINT32_code : AxB (_plus, mult, _uint32) + case GB_INT64_code : AxB (_plus, mult, _int64 ) + case GB_UINT64_code : AxB (_plus, mult, _uint64) + case GB_FP32_code : AxB (_plus, mult, _fp32 ) + case GB_FP64_code : AxB (_plus, mult, _fp64 ) default: ; } break ; - #undef ADD case GB_TIMES_opcode : // w *= t, identity is 1 - #define ADD(w,t) w *= t switch (zcode) { - // zcode ztype xytype identity - #define MULT(x,y) IMULT(x,y) - case GB_INT8_code : AxB (int8_t , int8_t , 1) ; - case GB_UINT8_code : AxB (uint8_t , uint8_t , 1) ; - case GB_INT16_code : AxB (int16_t , int16_t , 1) ; - case GB_UINT16_code : AxB (uint16_t, uint16_t, 1) ; - case GB_INT32_code : AxB (int32_t , int32_t , 1) ; - case GB_UINT32_code : AxB (uint32_t, uint32_t, 1) ; - case GB_INT64_code : AxB (int64_t , int64_t , 1) ; - case GB_UINT64_code : AxB (uint64_t, uint64_t, 1) ; - #undef MULT - #define MULT(x,y) FMULT(x,y) - case GB_FP32_code : AxB (float , float , 1) ; - case GB_FP64_code : AxB (double , double , 1) ; - #undef MULT + case GB_INT8_code : AxB (_times, mult, _int8 ) + case GB_UINT8_code : AxB (_times, mult, _uint8 ) + case GB_INT16_code : AxB (_times, mult, _int16 ) + case GB_UINT16_code : AxB (_times, mult, _uint16) + case GB_INT32_code : AxB (_times, mult, _int32 ) + case GB_UINT32_code : AxB (_times, mult, _uint32) + case GB_INT64_code : AxB (_times, mult, _int64 ) + case GB_UINT64_code : AxB (_times, mult, _uint64) + case GB_FP32_code : AxB (_times, mult, _fp32 ) + case GB_FP64_code : AxB (_times, mult, _fp64 ) default: ; } break ; - #undef ADD default: ; } - } #ifndef NO_BOOLEAN else { - - #define MULT(x,y) IMULT(x,y) switch (add_opcode) { - - case GB_LOR_opcode : - - #define ADD(w,t) w = (w || t) - // ztype xytype identity - AxB (bool, bool, false) ; - #undef ADD - - case GB_LAND_opcode : - - #define ADD(w,t) w = (w && t) - // ztype xytype identity - AxB (bool, bool, true) ; - #undef ADD - - case GB_LXOR_opcode : - - #define ADD(w,t) w = (w != t) - // ztype xytype identity - AxB (bool, bool, false) ; - #undef ADD - - case GB_EQ_opcode : - - #define ADD(w,t) w = (w == t) - // ztype xytype identity - AxB (bool, bool, true) ; - #undef ADD - + case GB_LOR_opcode : AxB (_lor , mult, _bool) + case GB_LAND_opcode : AxB (_land, mult, _bool) + case GB_LXOR_opcode : AxB (_lxor, mult, _bool) + case GB_EQ_opcode : AxB (_eq , mult, _bool) default: ; } - #undef MULT - } #endif #undef NO_BOOLEAN -#undef MULT -#undef IMULT -#undef FMULT +#undef mult diff --git a/GraphBLAS/Source/axb.m b/GraphBLAS/Source/axb.m new file mode 100644 index 0000000000..d2ae829952 --- /dev/null +++ b/GraphBLAS/Source/axb.m @@ -0,0 +1,46 @@ +function axb +%AXB: create all C=A*B functions for all semirings +% +% This function creates all files of the form GB_AxB__*.[ch], including 960 +% semirings (GB_AxB__*.c) and one include file, GB_AxB__semirings.h. + +f = fopen ('Generated/GB_AxB__semirings.h', 'w') ; +fprintf (f, '//------------------------------------------------------------------------------\n') ; +fprintf (f, '// GB_AxB__semirings.h: definitions for GB_AxB__*.c\n') ; +fprintf (f, '//------------------------------------------------------------------------------\n') ; +fprintf (f, '\n') ; +fprintf (f, '// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved.\n') ; +fprintf (f, '// http://suitesparse.com See GraphBLAS/Doc/License.txt for license.\n') ; +fprintf (f, '\n') ; +fprintf (f, '// This file has been automatically generated from Template/GB_AxB.h') ; +fprintf (f, '\n\n') ; +fclose (f) ; + +axb_template ('first', 1, 'x') ; +axb_template ('second', 1, 'y') ; +axb_template ('min', 0, 'IMIN(x,y)', 'FMIN(x,y)') ; +axb_template ('max', 0, 'IMAX(x,y)', 'FMAX(x,y)') ; +axb_template ('plus', 0, 'x + y') ; +axb_template ('minus', 0, 'flip ? (y-x) : (x-y)') ; +axb_template ('times', 0, 'x * y') ; +axb_template ('div', 0, 'flip ? IDIV(y,x) : IDIV(x,y)', ... + 'flip ? (y/x) : (x/y)') ; + +axb_template ('iseq', 0, 'x == y') ; +axb_template ('isne', 0, 'x != y') ; +axb_template ('isgt', 0, 'x > y') ; +axb_template ('islt', 0, 'x < y') ; +axb_template ('isge', 0, 'x >= y') ; +axb_template ('isle', 0, 'x <= y') ; + +axb_compare_template ('eq', 1, 'x == y') ; +axb_compare_template ('ne', 0, 'x != y') ; +axb_compare_template ('gt', 1, 'x > y') ; +axb_compare_template ('lt', 1, 'x < y') ; +axb_compare_template ('ge', 1, 'x >= y') ; +axb_compare_template ('le', 1, 'x <= y') ; + +axb_template ('lor', 1, '(x != 0) || (y != 0)') ; +axb_template ('land', 1, '(x != 0) && (y != 0)') ; +axb_template ('lxor', 1, '(x != 0) != (y != 0)') ; + diff --git a/GraphBLAS/Source/axb_compare_template.m b/GraphBLAS/Source/axb_compare_template.m new file mode 100644 index 0000000000..1268818d0e --- /dev/null +++ b/GraphBLAS/Source/axb_compare_template.m @@ -0,0 +1,70 @@ +function axb_compare_template (multop, do_boolean, imult, fmult) + +if (nargin < 4) + fmult = imult ; +end + +% lor monoid +add = 'w = (w || t)' ; +if (do_boolean) +axb_method ('lor', multop, add, imult, 'bool', 'bool' , 'false') ; +end +axb_method ('lor', multop, add, imult, 'bool', 'int8_t' , 'false') ; +axb_method ('lor', multop, add, imult, 'bool', 'uint8_t' , 'false') ; +axb_method ('lor', multop, add, imult, 'bool', 'int16_t' , 'false') ; +axb_method ('lor', multop, add, imult, 'bool', 'uint16_t', 'false') ; +axb_method ('lor', multop, add, imult, 'bool', 'int32_t' , 'false') ; +axb_method ('lor', multop, add, imult, 'bool', 'uint32_t', 'false') ; +axb_method ('lor', multop, add, imult, 'bool', 'int64_t' , 'false') ; +axb_method ('lor', multop, add, imult, 'bool', 'uint64_t', 'false') ; +axb_method ('lor', multop, add, fmult, 'bool', 'float' , 'false') ; +axb_method ('lor', multop, add, fmult, 'bool', 'double' , 'false') ; + +% land monoid +add = 'w = (w && t)' ; +if (do_boolean) +axb_method ('land', multop, add, imult, 'bool', 'bool' , 'true') ; +end +axb_method ('land', multop, add, imult, 'bool', 'int8_t' , 'true') ; +axb_method ('land', multop, add, imult, 'bool', 'uint8_t' , 'true') ; +axb_method ('land', multop, add, imult, 'bool', 'int16_t' , 'true') ; +axb_method ('land', multop, add, imult, 'bool', 'uint16_t', 'true') ; +axb_method ('land', multop, add, imult, 'bool', 'int32_t' , 'true') ; +axb_method ('land', multop, add, imult, 'bool', 'uint32_t', 'true') ; +axb_method ('land', multop, add, imult, 'bool', 'int64_t' , 'true') ; +axb_method ('land', multop, add, imult, 'bool', 'uint64_t', 'true') ; +axb_method ('land', multop, add, fmult, 'bool', 'float' , 'true') ; +axb_method ('land', multop, add, fmult, 'bool', 'double' , 'true') ; + +% lxor monoid +add = 'w = (w != t)' ; +if (do_boolean) +axb_method ('lxor', multop, add, imult, 'bool', 'bool' , 'false') ; +end +axb_method ('lxor', multop, add, imult, 'bool', 'int8_t' , 'false') ; +axb_method ('lxor', multop, add, imult, 'bool', 'uint8_t' , 'false') ; +axb_method ('lxor', multop, add, imult, 'bool', 'int16_t' , 'false') ; +axb_method ('lxor', multop, add, imult, 'bool', 'uint16_t', 'false') ; +axb_method ('lxor', multop, add, imult, 'bool', 'int32_t' , 'false') ; +axb_method ('lxor', multop, add, imult, 'bool', 'uint32_t', 'false') ; +axb_method ('lxor', multop, add, imult, 'bool', 'int64_t' , 'false') ; +axb_method ('lxor', multop, add, imult, 'bool', 'uint64_t', 'false') ; +axb_method ('lxor', multop, add, fmult, 'bool', 'float' , 'false') ; +axb_method ('lxor', multop, add, fmult, 'bool', 'double' , 'false') ; + +% eq monoid +add = 'w = (w == t)' ; +if (do_boolean) +axb_method ('eq', multop, add, imult, 'bool', 'bool' , 'true') ; +end +axb_method ('eq', multop, add, imult, 'bool', 'int8_t' , 'true') ; +axb_method ('eq', multop, add, imult, 'bool', 'uint8_t' , 'true') ; +axb_method ('eq', multop, add, imult, 'bool', 'int16_t' , 'true') ; +axb_method ('eq', multop, add, imult, 'bool', 'uint16_t', 'true') ; +axb_method ('eq', multop, add, imult, 'bool', 'int32_t' , 'true') ; +axb_method ('eq', multop, add, imult, 'bool', 'uint32_t', 'true') ; +axb_method ('eq', multop, add, imult, 'bool', 'int64_t' , 'true') ; +axb_method ('eq', multop, add, imult, 'bool', 'uint64_t', 'true') ; +axb_method ('eq', multop, add, fmult, 'bool', 'float' , 'true') ; +axb_method ('eq', multop, add, fmult, 'bool', 'double' , 'true') ; + diff --git a/GraphBLAS/Source/axb_method.m b/GraphBLAS/Source/axb_method.m new file mode 100644 index 0000000000..f451de7ae0 --- /dev/null +++ b/GraphBLAS/Source/axb_method.m @@ -0,0 +1,64 @@ +function axb_method (addop, multop, add, mult, ztype, xytype, identity) +%AXB_METHOD create a function to compute C=A*B over a semiring +% +% axb_method (addop, multop, add, mult, ztype, xytype, identity) + +f = fopen ('control.m4', 'w') ; + +switch (xytype) + case 'bool' + fname = 'bool' ; + case 'int8_t' + fname = 'int8' ; + case 'uint8_t' + fname = 'uint8' ; + case 'int16_t' + fname = 'int16' ; + case 'uint16_t' + fname = 'uint16' ; + case 'int32_t' + fname = 'int32' ; + case 'uint32_t' + fname = 'uint32' ; + case 'int64_t' + fname = 'int64' ; + case 'uint64_t' + fname = 'uint64' ; + case 'float' + fname = 'fp32' ; + case 'double' + fname = 'fp64' ; +end + +name = sprintf ('%s_%s_%s', addop, multop, fname) ; + +fprintf (f, 'define(`GB_AxB'', `GB_AxB__%s'')\n', name) ; +fprintf (f, 'define(`GB_AdotB'', `GB_AdotB__%s'')\n', name) ; +fprintf (f, 'define(`ztype'', `%s'')\n', ztype) ; +fprintf (f, 'define(`xytype'', `%s'')\n', xytype) ; +fprintf (f, 'define(`identity'', `%s'')\n', identity) ; + +mult = strrep (mult, 'x', '`$1''') ; +mult = strrep (mult, 'y', '`$2''') ; +fprintf (f, 'define(`MULT'', `%s'')\n', mult) ; + +add = strrep (add, 'w', '`$1''') ; +add = strrep (add, 't', '`$2''') ; +fprintf (f, 'define(`ADD'', `%s'')\n', add) ; + +fclose (f) ; +% type control.m4 + +cmd = sprintf (... +'cat control.m4 Template/GB_AxB.c | m4 | tail +8 > Generated/GB_AxB__%s.c', ... +name) ; +fprintf ('%s\n', cmd) ; +system (cmd) ; + +cmd = sprintf (... +'cat control.m4 Template/GB_AxB.h | m4 | tail +8 >> Generated/GB_AxB__semirings.h') ; +% fprintf ('%s\n', cmd) ; +system (cmd) ; + +delete ('control.m4') ; + diff --git a/GraphBLAS/Source/axb_template.m b/GraphBLAS/Source/axb_template.m new file mode 100644 index 0000000000..6326c7d3d9 --- /dev/null +++ b/GraphBLAS/Source/axb_template.m @@ -0,0 +1,68 @@ +function axb_template (multop, do_boolean, imult, fmult) + +if (nargin < 4) + fmult = imult ; +end + +% min monoid +add = 'w = IMIN (w,t)' ; +axb_method ('min', multop, add, imult, 'int8_t' , 'int8_t' , 'INT8_MAX') ; +axb_method ('min', multop, add, imult, 'uint8_t' , 'uint8_t' , 'UINT8_MAX') ; +axb_method ('min', multop, add, imult, 'int16_t' , 'int16_t' , 'INT16_MAX') ; +axb_method ('min', multop, add, imult, 'uint16_t', 'uint16_t', 'UINT16_MAX') ; +axb_method ('min', multop, add, imult, 'int32_t' , 'int32_t' , 'INT32_MAX') ; +axb_method ('min', multop, add, imult, 'uint32_t', 'uint32_t', 'UINT32_MAX') ; +axb_method ('min', multop, add, imult, 'int64_t' , 'int64_t' , 'INT64_MAX') ; +axb_method ('min', multop, add, imult, 'uint64_t', 'uint64_t', 'UINT64_MAX') ; +add = 'w = FMIN (w,t)' ; +axb_method ('min', multop, add, fmult, 'float' , 'float' , 'INFINITY') ; +axb_method ('min', multop, add, fmult, 'double' , 'double' , 'INFINITY') ; + +% max monoid +add = 'w = IMAX (w,t)' ; +axb_method ('max', multop, add, imult, 'int8_t' , 'int8_t' , 'INT8_MIN') ; +axb_method ('max', multop, add, imult, 'uint8_t' , 'uint8_t' , '0') ; +axb_method ('max', multop, add, imult, 'int16_t' , 'int16_t' , 'INT16_MIN') ; +axb_method ('max', multop, add, imult, 'uint16_t', 'uint16_t', '0') ; +axb_method ('max', multop, add, imult, 'int32_t' , 'int32_t' , 'INT32_MIN') ; +axb_method ('max', multop, add, imult, 'uint32_t', 'uint32_t', '0') ; +axb_method ('max', multop, add, imult, 'int64_t' , 'int64_t' , 'INT64_MIN') ; +axb_method ('max', multop, add, imult, 'uint64_t', 'uint64_t', '0') ; +add = 'w = FMAX (w,t)' ; +axb_method ('max', multop, add, fmult, 'float' , 'float' , '-INFINITY') ; +axb_method ('max', multop, add, fmult, 'double' , 'double' , '-INFINITY') ; + +% plus monoid +add = 'w += t' ; +axb_method ('plus', multop, add, imult, 'int8_t' , 'int8_t' , '0') ; +axb_method ('plus', multop, add, imult, 'uint8_t' , 'uint8_t' , '0') ; +axb_method ('plus', multop, add, imult, 'int16_t' , 'int16_t' , '0') ; +axb_method ('plus', multop, add, imult, 'uint16_t', 'uint16_t', '0') ; +axb_method ('plus', multop, add, imult, 'int32_t' , 'int32_t' , '0') ; +axb_method ('plus', multop, add, imult, 'uint32_t', 'uint32_t', '0') ; +axb_method ('plus', multop, add, imult, 'int64_t' , 'int64_t' , '0') ; +axb_method ('plus', multop, add, imult, 'uint64_t', 'uint64_t', '0') ; +axb_method ('plus', multop, add, fmult, 'float' , 'float' , '0') ; +axb_method ('plus', multop, add, fmult, 'double' , 'double' , '0') ; + +% times monoid +add = 'w *= t' ; +axb_method ('times', multop, add, imult, 'int8_t' , 'int8_t' , '1') ; +axb_method ('times', multop, add, imult, 'uint8_t' , 'uint8_t' , '1') ; +axb_method ('times', multop, add, imult, 'int16_t' , 'int16_t' , '1') ; +axb_method ('times', multop, add, imult, 'uint16_t', 'uint16_t', '1') ; +axb_method ('times', multop, add, imult, 'int32_t' , 'int32_t' , '1') ; +axb_method ('times', multop, add, imult, 'uint32_t', 'uint32_t', '1') ; +axb_method ('times', multop, add, imult, 'int64_t' , 'int64_t' , '1') ; +axb_method ('times', multop, add, imult, 'uint64_t', 'uint64_t', '1') ; +axb_method ('times', multop, add, fmult, 'float' , 'float' , '1') ; +axb_method ('times', multop, add, fmult, 'double' , 'double' , '1') ; + +% boolean monoids +if (do_boolean) + axb_method ('lor', multop, 'w = (w || t)', imult, 'bool', 'bool', 'false'); + axb_method ('land', multop, 'w = (w && t)', imult, 'bool', 'bool', 'true') ; + axb_method ('lxor', multop, 'w = (w != t)', imult, 'bool', 'bool', 'false'); + axb_method ('eq', multop, 'w = (w == t)', imult, 'bool', 'bool', 'true') ; +end + diff --git a/GraphBLAS/Tcov/Makefile b/GraphBLAS/Tcov/Makefile index a45b78a4c3..970a027cc1 100644 --- a/GraphBLAS/Tcov/Makefile +++ b/GraphBLAS/Tcov/Makefile @@ -9,7 +9,8 @@ # This Makefile is only intended to clean up the compiled files and files # created by the gbcover.m function in MATLAB. To compile GraphBLAS for -# statement coverage testing, type 'gbcover' in MATLAB. +# statement coverage testing, type 'gbcover' in MATLAB. To compile the tests +# and run them, type 'testcov' in MATLAB. clean: $(RM) *.o *.obj diff --git a/GraphBLAS/Tcov/README.txt b/GraphBLAS/Tcov/README.txt index 701b47f34e..382280ae3b 100644 --- a/GraphBLAS/Tcov/README.txt +++ b/GraphBLAS/Tcov/README.txt @@ -45,4 +45,5 @@ Files in GraphBLAS/Tcov: gbcover_finish.c save the last test coverage counter gbcover_start.c declare the test coverage counter array gbcover_util.c get/put the coverage to/from MATLAB - log_Nov25.txt 100% test coverage certificate + log_*.txt 100% test coverage certificate + diff --git a/GraphBLAS/Tcov/gbcover.m b/GraphBLAS/Tcov/gbcover.m index d152844d5f..0cf2888171 100644 --- a/GraphBLAS/Tcov/gbcover.m +++ b/GraphBLAS/Tcov/gbcover.m @@ -20,6 +20,7 @@ % create the single cover_gb.c for all other GraphBLAS source files cfiles = [ dir('gbcover_start.c') ; ... dir('../Source/*.c') ; ... + dir('../Source/Generated/*.c') ; ... dir('../Demo/Source/usercomplex.c') ; ... dir('../Demo/Source/simple_rand.c') ; ... dir('../Demo/Source/random_matrix.c') ; ... @@ -56,7 +57,7 @@ dir('*.h') ] ; % list of include directories -inc = '-I../Include -I../Source -I../Source/Template' ; +inc = '-I../Include -I../Source -I../Source/Template -I../Source/Generated' ; inc = [inc ' -I../Test -I../Test/Template -I. -I../Demo/Include'] ; % gbmake is in ../Test diff --git a/GraphBLAS/Tcov/gbcover_start.c b/GraphBLAS/Tcov/gbcover_start.c index be06db48eb..f06bb7438c 100644 --- a/GraphBLAS/Tcov/gbcover_start.c +++ b/GraphBLAS/Tcov/gbcover_start.c @@ -17,7 +17,7 @@ #include "GB.h" -#define GBCOVER_MAX 40000 +#define GBCOVER_MAX 80000 int64_t gbcov [GBCOVER_MAX] ; extern int gbcover_max ; diff --git a/GraphBLAS/Tcov/log.txt b/GraphBLAS/Tcov/log.txt deleted file mode 100644 index 2c30ac51a6..0000000000 --- a/GraphBLAS/Tcov/log.txt +++ /dev/null @@ -1,37 +0,0 @@ - ----------------------------------------------- [malloc] [cover] -02-Dec-2017 21:46:33 test98 0.4 sec coverage: 170 of 34187 ( 0.5%) -02-Dec-2017 21:46:34 test97 0.1 sec coverage: 317 of 34187 ( 0.9%) -02-Dec-2017 21:46:34 test01 0.0 sec coverage: 1014 of 34187 ( 3.0%) -02-Dec-2017 21:46:34 test02 0.3 sec coverage: 1349 of 34187 ( 3.9%) -02-Dec-2017 21:46:34 test03 0.1 sec coverage: 1375 of 34187 ( 4.0%) -02-Dec-2017 21:46:34 test04 0.1 sec coverage: 1433 of 34187 ( 4.2%) -02-Dec-2017 21:46:34 test05 0.0 sec coverage: 1434 of 34187 ( 4.2%) -02-Dec-2017 21:46:34 test07 0.0 sec coverage: 1463 of 34187 ( 4.3%) -02-Dec-2017 21:46:34 test07b 0.0 sec coverage: 1469 of 34187 ( 4.3%) -02-Dec-2017 21:46:35 test08 0.6 sec coverage: 1485 of 34187 ( 4.3%) -02-Dec-2017 21:46:35 test09 0.0 sec coverage: 1493 of 34187 ( 4.4%) -02-Dec-2017 21:46:35 test13 0.0 sec coverage: 1499 of 34187 ( 4.4%) -02-Dec-2017 21:46:35 test15 0.1 sec coverage: 1567 of 34187 ( 4.6%) -02-Dec-2017 21:46:35 test17 0.5 sec coverage: 1570 of 34187 ( 4.6%) -02-Dec-2017 21:46:36 test72 0.7 sec coverage: 1627 of 34187 ( 4.8%) -02-Dec-2017 21:46:39 test20 3.3 sec coverage: 1795 of 34187 ( 5.3%) -02-Dec-2017 21:46:54 test25 15.0 sec coverage: 1900 of 34187 ( 5.6%) -02-Dec-2017 21:46:55 test26 0.2 sec coverage: 1900 of 34187 ( 5.6%) -02-Dec-2017 21:47:10 test27 13.2 sec coverage: 1908 of 34187 ( 5.6%) -02-Dec-2017 21:47:12 test11 2.2 sec coverage: 1910 of 34187 ( 5.6%) -02-Dec-2017 21:47:15 test14 2.2 sec coverage: 2561 of 34187 ( 7.5%) -02-Dec-2017 21:47:18 test00 3.9 sec coverage: 2584 of 34187 ( 7.6%) -02-Dec-2017 21:47:30 test19 10.0 sec coverage: 2659 of 34187 ( 7.8%) -02-Dec-2017 21:47:42 test12 11.3 sec coverage: 2691 of 34187 ( 7.9%) -02-Dec-2017 21:47:55 test10 13.0 sec coverage: 7985 of 34187 ( 23.4%) -02-Dec-2017 21:48:12 test74 17.4 sec coverage: 16773 of 34187 ( 49.1%) -02-Dec-2017 21:48:29 test99 15.2 sec coverage: 16816 of 34187 ( 49.2%) -02-Dec-2017 21:48:45 test23 14.9 sec coverage: 17146 of 34187 ( 50.2%) -02-Dec-2017 21:49:08 test18 22.1 sec coverage: 17174 of 34187 ( 50.2%) -02-Dec-2017 21:49:44 test16 32.2 sec coverage: 17267 of 34187 ( 50.5%) -02-Dec-2017 21:50:33 test24 45.1 sec coverage: 17274 of 34187 ( 50.5%) -02-Dec-2017 21:51:32 test21 56.5 sec coverage: 17280 of 34187 ( 50.5%) -02-Dec-2017 21:53:28 test06 105.2 sec coverage: 33739 of 34187 ( 98.7%) -02-Dec-2017 21:58:44 test19b 280.8 sec coverage: 33811 of 34187 ( 98.9%) -02-Dec-2017 22:04:21 test22 301.1 sec coverage: 34187 of 34187 (100.0%) diff --git a/GraphBLAS/Tcov/log_Dec28.txt b/GraphBLAS/Tcov/log_Dec28.txt new file mode 100644 index 0000000000..c3ee638bbe --- /dev/null +++ b/GraphBLAS/Tcov/log_Dec28.txt @@ -0,0 +1,41 @@ + +---------------------------------------------- [malloc] [cover] +28-Dec-2017 13:21:38 test98 0.3 sec coverage: 179 of 62233 ( 0.3%) +28-Dec-2017 13:21:38 test97 0.1 sec coverage: 325 of 62233 ( 0.5%) +28-Dec-2017 13:21:38 test01 0.0 sec coverage: 1225 of 62233 ( 2.0%) +28-Dec-2017 13:21:39 test02 0.2 sec coverage: 1559 of 62233 ( 2.5%) +28-Dec-2017 13:21:39 test03 0.1 sec coverage: 1584 of 62233 ( 2.5%) +28-Dec-2017 13:21:39 test04 0.1 sec coverage: 1609 of 62233 ( 2.6%) +28-Dec-2017 13:21:39 test05 0.0 sec coverage: 1610 of 62233 ( 2.6%) +28-Dec-2017 13:21:39 test07 0.0 sec coverage: 1626 of 62233 ( 2.6%) +28-Dec-2017 13:21:39 test07b 0.0 sec coverage: 1630 of 62233 ( 2.6%) +28-Dec-2017 13:21:39 test08 0.6 sec coverage: 1633 of 62233 ( 2.6%) +28-Dec-2017 13:21:39 test09 0.0 sec coverage: 1633 of 62233 ( 2.6%) +28-Dec-2017 13:21:39 test13 0.0 sec coverage: 1638 of 62233 ( 2.6%) +28-Dec-2017 13:21:39 test15 0.1 sec coverage: 1673 of 62233 ( 2.7%) +28-Dec-2017 13:21:40 test17 0.6 sec coverage: 1676 of 62233 ( 2.7%) +28-Dec-2017 13:21:40 test72 0.6 sec coverage: 1724 of 62233 ( 2.8%) +28-Dec-2017 13:21:41 test26 0.4 sec coverage: 1746 of 62233 ( 2.8%) +28-Dec-2017 13:21:41 test29 0.6 sec coverage: 2077 of 62233 ( 3.3%) +28-Dec-2017 13:21:42 test69 1.1 sec coverage: 2101 of 62233 ( 3.4%) +28-Dec-2017 13:21:43 test28 0.2 sec coverage: 2108 of 62233 ( 3.4%) +28-Dec-2017 13:21:45 test11 2.5 sec coverage: 2110 of 62233 ( 3.4%) +28-Dec-2017 13:21:48 test14 2.7 sec coverage: 2618 of 62233 ( 4.2%) +28-Dec-2017 13:21:51 test20 3.2 sec coverage: 2753 of 62233 ( 4.4%) +28-Dec-2017 13:21:54 test00 3.9 sec coverage: 2775 of 62233 ( 4.5%) +28-Dec-2017 13:22:03 test19 9.5 sec coverage: 2818 of 62233 ( 4.5%) +28-Dec-2017 13:22:13 test12 9.6 sec coverage: 2850 of 62233 ( 4.6%) +28-Dec-2017 13:22:28 test10 16.3 sec coverage: 8131 of 62233 ( 13.1%) +28-Dec-2017 13:22:46 test27 18.0 sec coverage: 8141 of 62233 ( 13.1%) +28-Dec-2017 13:23:03 test25 17.3 sec coverage: 8191 of 62233 ( 13.2%) +28-Dec-2017 13:23:19 test74 18.6 sec coverage: 22733 of 62233 ( 36.5%) +28-Dec-2017 13:23:37 test99 18.0 sec coverage: 22771 of 62233 ( 36.6%) +28-Dec-2017 13:23:55 test23 18.1 sec coverage: 23100 of 62233 ( 37.1%) +28-Dec-2017 13:24:18 test18 25.4 sec coverage: 23122 of 62233 ( 37.2%) +28-Dec-2017 13:24:57 test16 39.7 sec coverage: 23252 of 62233 ( 37.4%) +28-Dec-2017 13:25:45 test24 49.4 sec coverage: 23257 of 62233 ( 37.4%) +28-Dec-2017 13:26:43 test21 60.7 sec coverage: 23261 of 62233 ( 37.4%) +28-Dec-2017 13:28:53 test06 128.4 sec coverage: 40668 of 62233 ( 65.3%) +28-Dec-2017 13:34:39 test75 343.1 sec coverage: 61786 of 62233 ( 99.3%) +28-Dec-2017 13:38:50 test19b 248.8 sec coverage: 61857 of 62233 ( 99.4%) +28-Dec-2017 13:44:36 test22 344.9 sec coverage: 62233 of 62233 (100.0%) diff --git a/GraphBLAS/Test/Contents.m b/GraphBLAS/Test/Contents.m index 7d26f5cd4f..1fb724367b 100644 --- a/GraphBLAS/Test/Contents.m +++ b/GraphBLAS/Test/Contents.m @@ -22,7 +22,7 @@ % GB_spec_eWiseMult_Matrix - a MATLAB mimic of GrB_eWiseMult_Matrix % GB_spec_eWiseMult_Vector - a MATLAB mimic of GrB_eWiseMult_Vector % GB_spec_extractTuples - a MATLAB mimic of GrB_*_extractTuples -% GB_spec_identity - the additive identity of a semiring +% GB_spec_identity - the additive identity of a monoid % GB_spec_mask - a pure MATLAB implementation of GrB_mask % GB_spec_matrix - a MATLAB mimic that conforms a matrix to the GraphBLAS spec % GB_spec_mxm - a MATLAB mimic of GrB_mxm @@ -33,6 +33,7 @@ % GB_spec_random - generate random matrix % GB_spec_reduce_to_scalar - a MATLAB mimic of GrB_reduce (to scalar) % GB_spec_reduce_to_vector - a MATLAB mimic of GrB_reduce (to vector) +% GB_spec_select - a MATLAB mimic of GxB_select % GB_spec_semiring - create a semiring % GB_spec_subassign - a MATLAB mimic of GxB_subassign % GB_spec_transpose - a MATLAB mimic of GrB_transpose @@ -76,6 +77,11 @@ % test22 - test GrB_transpose % test23 - test GrB_*_build % test24 - test GrB_reduce +% test25 - test GxB_select +% test26 - performance test for GxB_select +% test27 - test GxB_select with user-defined select op (band) +% test28 - test mxm with aliased inputs, C = accum(C,C*C) +% test29 - GrB_reduce with zombies % test30 - test GxB_subassign % test30b - test GrB_assign % test31 - test GrB_transpose @@ -120,6 +126,11 @@ % test66 - test GrB_reduce % test67 - test GrB_apply % test68 - performance tests for eWiseMult +% test69 - test GrB_assign with aliased inputs, C(:,:) = accum(C(:,:),C) +% test72 - special cases for mxm, ewise, ... +% test73 - performance of C = A*B, with mask +% test74 - test GrB_mxm: dot product method +% test75 - test GrB_mxm and GrB_vxm on all semirings (A'B dot product) % test97 - test GB_assign, scalar expansion and zombies % test98 - test GB_mxm, typecasting on the fly % test99 - test GB_mex_transpose with explicit zeros in the Mask @@ -135,9 +146,6 @@ % testca - test complex mxm, mxv, and vxm % testcb - test complex reduce % testcc - test complex transpose -% test72 - special cases for mxm, ewise, ... -% test74 - test GrB_mxm: dot product method -% test73 - performance of C = A*B, with mask % % Helper functions % diff --git a/GraphBLAS/Test/GB_mex.h b/GraphBLAS/Test/GB_mex.h index 89f047e2f4..6b598b1ffa 100644 --- a/GraphBLAS/Test/GB_mex.h +++ b/GraphBLAS/Test/GB_mex.h @@ -218,15 +218,37 @@ void GB_mx_complex_split // split complex array to real/imag part for MATLAB mxArray *Y // MATLAB array with n elements ) ; +bool GB_mx_same // true if arrays X and Y are the same +( + char *X, + char *Y, + int64_t len // length of X and Y +) ; + +bool GB_mx_xsame // true if arrays X and Y are the same (ignoring zombies) +( + char *X, + char *Y, + int64_t len, // length of X and Y + size_t s, // size of each entry of X and Y + int64_t *I // row indices (for zombies), same length as X and Y +) ; + +bool GB_mx_isequal // true if A and B are exactly the same +( + GrB_Matrix A, + GrB_Matrix B +) ; + #ifdef PRINT_MALLOC #define AS_IF_FREE(p) \ { \ - GB_thread_local.nmalloc-- ; \ + GB_Global.nmalloc-- ; \ printf ("\nfree: to MATLAB (%s) line %d file %s\n",\ GB_STR(p), __LINE__,__FILE__); \ printf ("free: %14p %3d %1d\n", \ - p, GB_thread_local.nmalloc, GB_thread_local.malloc_debug) ; \ + p, GB_Global.nmalloc, GB_Global.malloc_debug) ; \ (p) = NULL ; \ } @@ -234,7 +256,7 @@ void GB_mx_complex_split // split complex array to real/imag part for MATLAB #define AS_IF_FREE(p) \ { \ - GB_thread_local.nmalloc-- ; \ + GB_Global.nmalloc-- ; \ (p) = NULL ; \ } @@ -244,7 +266,7 @@ void GB_mx_complex_split // split complex array to real/imag part for MATLAB #define METHOD_START(OP) \ printf ("\n================================================================================\n") ; \ - printf ("method: [%s] start: %d\n", #OP, GB_thread_local.nmalloc) ; \ + printf ("method: [%s] start: %d\n", #OP, GB_Global.nmalloc) ; \ printf ("================================================================================\n") ; #define METHOD_TRY \ @@ -280,7 +302,7 @@ void GB_mx_complex_split // split complex array to real/imag part for MATLAB else \ { \ /* brutal malloc debug */ \ - int nmalloc_start = (int) GB_thread_local.nmalloc \ + int nmalloc_start = (int) GB_Global.nmalloc \ - ((GB_thread_local.Mark == NULL) ? 0:1 ) \ - ((GB_thread_local.Work == NULL) ? 0:1 ) \ - ((GB_thread_local.Flag == NULL) ? 0:1 ) ; \ @@ -288,13 +310,13 @@ void GB_mx_complex_split // split complex array to real/imag part for MATLAB { \ /* give GraphBLAS the ability to do a # of mallocs, */ \ /* callocs, and reallocs of larger size, equal to tries */ \ - GB_thread_local.malloc_debug_count = tries ; \ + GB_Global.malloc_debug_count = tries ; \ METHOD_TRY ; \ /* call the method with malloc debug enabled */ \ - GB_thread_local.malloc_debug = true ; \ + GB_Global.malloc_debug = true ; \ GB_thread_local.info = GrB_SUCCESS ; \ GrB_Info info = GRAPHBLAS_OPERATION ; \ - GB_thread_local.malloc_debug = false ; \ + GB_Global.malloc_debug = false ; \ if (info == GrB_SUCCESS || info == GrB_NO_VALUE) \ { \ /* finally gave GraphBLAS enough malloc's to do the work */ \ @@ -308,7 +330,7 @@ void GB_mx_complex_split // split complex array to real/imag part for MATLAB /* but turn off malloc debugging to get the copy */ \ FREE_DEEP_COPY ; \ GET_DEEP_COPY ; \ - int nmalloc_end = (int) GB_thread_local.nmalloc \ + int nmalloc_end = (int) GB_Global.nmalloc \ - ((GB_thread_local.Mark == NULL) ? 0:1 ) \ - ((GB_thread_local.Work == NULL) ? 0:1 ) \ - ((GB_thread_local.Flag == NULL) ? 0:1 ) ; \ diff --git a/GraphBLAS/Test/GB_mex_Col_assign.c b/GraphBLAS/Test/GB_mex_Col_assign.c index f774ac7d13..3605bba940 100644 --- a/GraphBLAS/Test/GB_mex_Col_assign.c +++ b/GraphBLAS/Test/GB_mex_Col_assign.c @@ -270,8 +270,8 @@ GrB_Info many_assign // if (k == CATCH) GB_check (C, "C start", 3) ; // [ turn off malloc debugging - bool save = GB_thread_local.malloc_debug ; - GB_thread_local.malloc_debug = false ; + bool save = GB_Global.malloc_debug ; + GB_Global.malloc_debug = false ; // get Mask (shallow copy) Mask = NULL ; @@ -365,7 +365,7 @@ GrB_Info many_assign } } // restore malloc debugging to test the method - GB_thread_local.malloc_debug = save ; // ] + GB_Global.malloc_debug = save ; // ] // GB_check (desc, "desc", 3) ; diff --git a/GraphBLAS/Test/GB_mex_Matrix_extractElement.c b/GraphBLAS/Test/GB_mex_Matrix_extractElement.c index 92b014b761..ed1fcb979f 100644 --- a/GraphBLAS/Test/GB_mex_Matrix_extractElement.c +++ b/GraphBLAS/Test/GB_mex_Matrix_extractElement.c @@ -17,7 +17,7 @@ #define FREE_ALL \ { \ GB_MATRIX_FREE (&A) ; \ - GB_FREE_MEMORY (Xtemp) ; \ + GB_FREE_MEMORY (Xtemp, ni, sizeof (double complex)) ; \ GB_mx_put_global (malloc_debug) ; \ } @@ -34,6 +34,10 @@ void mexFunction GrB_Matrix A = NULL ; void *Y = NULL ; void *Xtemp = NULL ; + GrB_Index *I = NULL, ni = 0 ; + GrB_Index *J = NULL, nj = 0 ; + mxClassID xclass ; + GrB_Type xtype ; // check inputs if (nargout > 1 || nargin < 3 || nargin > 4) @@ -55,7 +59,6 @@ void mexFunction mxClassID aclass = GB_mx_Type_to_classID (A->type) ; // get I - GrB_Index *I, ni ; if (!GB_mx_mxArray_to_indices (&I, pargin [1], &ni)) { FREE_ALL ; @@ -63,7 +66,6 @@ void mexFunction } // get J - GrB_Index *J, nj ; if (!GB_mx_mxArray_to_indices (&J, pargin [2], &nj)) { FREE_ALL ; @@ -77,8 +79,6 @@ void mexFunction } // get xclass, default is class (A), and the corresponding xtype - mxClassID xclass ; - GrB_Type xtype ; if (A->type == Complex) { diff --git a/GraphBLAS/Test/GB_mex_Vector_extractElement.c b/GraphBLAS/Test/GB_mex_Vector_extractElement.c index acd7f2123a..338ece6c59 100644 --- a/GraphBLAS/Test/GB_mex_Vector_extractElement.c +++ b/GraphBLAS/Test/GB_mex_Vector_extractElement.c @@ -12,7 +12,7 @@ #define FREE_ALL \ { \ GrB_free (&v) ; \ - GB_FREE_MEMORY (Xtemp) ; \ + GB_FREE_MEMORY (Xtemp, ni, sizeof (double complex)) ; \ GB_mx_put_global (malloc_debug) ; \ } @@ -29,6 +29,9 @@ void mexFunction GrB_Vector v = NULL ; void *Y = NULL ; void *Xtemp = NULL ; + GrB_Index *I = NULL, ni = 0 ; + mxClassID xclass ; + GrB_Type xtype ; // check inputs if (nargout > 1 || nargin < 2 || nargin > 3) @@ -50,18 +53,13 @@ void mexFunction mxClassID aclass = GB_mx_Type_to_classID (v->type) ; // get I - GrB_Index *I, ni ; if (!GB_mx_mxArray_to_indices (&I, pargin [1], &ni)) { FREE_ALL ; mexErrMsgTxt ("I failed") ; } - // get xclass, default is class (A), and the corresponding xtype - mxClassID xclass ; - GrB_Type xtype ; - if (v->type == Complex) { // input argument xclass is ignored diff --git a/GraphBLAS/Test/GB_mex_assign.c b/GraphBLAS/Test/GB_mex_assign.c index 86a7cbb609..30e7ce19e8 100644 --- a/GraphBLAS/Test/GB_mex_assign.c +++ b/GraphBLAS/Test/GB_mex_assign.c @@ -286,8 +286,8 @@ GrB_Info many_assign mxArray *p ; // [ turn off malloc debugging - bool save = GB_thread_local.malloc_debug ; - GB_thread_local.malloc_debug = false ; + bool save = GB_Global.malloc_debug ; + GB_Global.malloc_debug = false ; // get Mask (shallow copy) Mask = NULL ; @@ -367,7 +367,7 @@ GrB_Info many_assign } // restore malloc debugging to test the method - GB_thread_local.malloc_debug = save ; // ] + GB_Global.malloc_debug = save ; // ] //---------------------------------------------------------------------- // C(I,J) = A diff --git a/GraphBLAS/Test/GB_mex_assign_alias.c b/GraphBLAS/Test/GB_mex_assign_alias.c new file mode 100644 index 0000000000..1ab5b76b33 --- /dev/null +++ b/GraphBLAS/Test/GB_mex_assign_alias.c @@ -0,0 +1,96 @@ +//------------------------------------------------------------------------------ +// GB_mex_assign_alias: C(I,J) = accum(C(I,J),C) +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +#include "GB_mex.h" + +#define FREE_ALL \ +{ \ + GB_MATRIX_FREE (&C) ; \ + GrB_free (&desc) ; \ + GB_mx_put_global (malloc_debug) ; \ +} + +void mexFunction +( + int nargout, + mxArray *pargout [ ], + int nargin, + const mxArray *pargin [ ] +) +{ + + bool malloc_debug = GB_mx_get_global ( ) ; + GrB_Matrix C = NULL ; + GrB_Descriptor desc = NULL ; + + // check inputs + if (nargout > 1 || nargin < 2 || nargin > 5) + { + mexErrMsgTxt ("Usage: C = GB_mex_assign_alias (C, accum, I, J, desc)"); + } + + // get C (make a deep copy) + #define GET_DEEP_COPY \ + { \ + C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true) ; \ + } + #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ; + GET_DEEP_COPY ; + if (C == NULL) + { + FREE_ALL ; + mexErrMsgTxt ("C failed") ; + } + mxClassID cclass = GB_mx_Type_to_classID (C->type) ; + + // get accum; default: NOP, default class is class(C) + GrB_BinaryOp accum ; + if (!GB_mx_mxArray_to_BinaryOp (&accum, pargin [1], "accum", + GB_NOP_opcode, cclass, C->type == Complex, C->type == Complex)) + { + FREE_ALL ; + mexErrMsgTxt ("accum failed") ; + } + + GrB_Index *I, *J, ni, nj ; + + // get I + if (!GB_mx_mxArray_to_indices (&I, PARGIN (2), &ni)) + { + FREE_ALL ; + mexErrMsgTxt ("I failed") ; + } + + // get J + if (!GB_mx_mxArray_to_indices (&J, PARGIN (3), &nj)) + { + FREE_ALL ; + mexErrMsgTxt ("J failed") ; + } + + // get desc + if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (4), "desc")) + { + FREE_ALL ; + mexErrMsgTxt ("desc failed") ; + } + + GrB_Index nrows, ncols ; + GrB_Matrix_nvals (&nrows, C) ; + GrB_Matrix_nvals (&ncols, C) ; + + // C(I,J) = accum (C(I,J),C) + METHOD (GrB_assign (C, NULL, accum, C, I, ni, J, nj, desc)) ; + + // return C to MATLAB as a struct and free the GraphBLAS C + pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ; + + FREE_ALL ; +} + diff --git a/GraphBLAS/Test/GB_mex_cast.c b/GraphBLAS/Test/GB_mex_cast.c index 401d190058..9af7b240ab 100644 --- a/GraphBLAS/Test/GB_mex_cast.c +++ b/GraphBLAS/Test/GB_mex_cast.c @@ -56,7 +56,7 @@ void mexFunction mxCOMPLEX) ; GB_mx_complex_split (nrows*ncols, X, pargout [0]) ; // X is a deep copy that must be freed - GB_FREE_MEMORY (X) ; + GB_FREE_MEMORY (X, nrows*ncols, sizeof (double complex)) ; } else { diff --git a/GraphBLAS/Test/GB_mex_errors.c b/GraphBLAS/Test/GB_mex_errors.c index a1ef5d93c2..2cbf37b994 100644 --- a/GraphBLAS/Test/GB_mex_errors.c +++ b/GraphBLAS/Test/GB_mex_errors.c @@ -136,9 +136,14 @@ void mexFunction fprintf (f,"========================================================\n") ; fprintf (f,"many errors are expected\n") ; - printf ("nmalloc %d at start\n", GB_thread_local.nmalloc) ; + GxB_Statistics stats ; + int64_t nmalloc ; + GxB_stats (&stats) ; nmalloc = stats.nmalloc ; + + printf ("nmalloc %d at start\n", nmalloc) ; bool malloc_debug = GB_mx_get_global ( ) ; - printf ("nmalloc %d after complex init\n", GB_thread_local.nmalloc) ; + GxB_stats (&stats) ; nmalloc = stats.nmalloc ; + printf ("nmalloc %d after complex init\n", nmalloc) ; GrB_Matrix A = NULL, B = NULL, C = NULL, Z = NULL, Agunk = NULL, Aempty = NULL, E = NULL, F = NULL, A0 = NULL, H = NULL, @@ -156,7 +161,7 @@ void mexFunction GrB_Descriptor desc = NULL, dgunk = NULL, d0 = NULL, dnt = NULL, dtn = NULL, dtt = NULL, descb = NULL ; GrB_Desc_Value dval ; - GrB_Index n = 0, nvals = 0, n2 = 0, i = 0, j = 0, a, b ; + GrB_Index n = 0, nvals = 0, n2 = 0, i = 0, j = 0, a, b, uvals = 0 ; GrB_Index *I0 = NULL, *J0 = NULL ; #define LEN 100 GrB_Index I [5] = { 0, 7, 8, 3, 2 }, I2 [LEN] ; @@ -425,8 +430,6 @@ void mexFunction // SelectOp //-------------------------------------------------------------------------- - // int nmalloc1 = GB_thread_local.nmalloc ; - CHECK (selectop == NULL) ; OK (GxB_SelectOp_new (&selectop, fselect, GrB_FP64)) ; OK (GxB_SelectOp_free (&selectop)) ; @@ -456,8 +459,6 @@ void mexFunction OK (GxB_SelectOp_free (&selectop)) ; CHECK (selectop == NULL) ; - // int nmalloc2 = GB_thread_local.nmalloc ; - //-------------------------------------------------------------------------- // Monoid //-------------------------------------------------------------------------- @@ -520,7 +521,7 @@ void mexFunction ERR (GrB_Monoid_new (&monoid, GrB_EQ_FP64, (double) 0)) ; CHECK (monoid == NULL) ; - // These feel like they should work, but '0' becomes int, and it doesn't + // These feel like they should work, but '0' becomes int, and it does not // match the type of the operator. So it is expected to fail with a // domain mismatch. ERR (GrB_Monoid_new (&monoid, GrB_PLUS_FP64, 0)) ; @@ -2279,6 +2280,27 @@ void mexFunction ERR (GxB_subassign (A, NULL, NULL, A, I, 2, J, 3, dtn)) ; ERR (GxB_subassign (A , v , NULL, v , 0, J, 0, NULL)) ; + // for (int k = 0 ; k < 3 ; k++) printf ("I [%d] = %lld\n", k, I [k]) ; + // for (int k = 0 ; k < 2 ; k++) printf ("J [%d] = %lld\n", k, J [k]) ; + // GB_check (A, "Aok", 3) ; + expected = GrB_INDEX_OUT_OF_BOUNDS ; + ERR (GxB_subassign (A, NULL, GrB_PLUS_FP64, C, I, 3, J, 2, NULL)) ; + + // GB_check (A, "Aok1", 3) ; + // GB_check (C, "Cok", 3) ; + + GrB_Index I3 [5] = { 0, 1, 2, 3, 4 } ; + GrB_Index J3 [5] = { 0, 1, 2, 3, 4 } ; + + OK (GxB_subassign (A, NULL, GrB_PLUS_FP64, C, I3, 3, J3, 2, NULL)) ; + // GB_check (A, "Aok2", 3) ; + + OK (GxB_subassign (C, C, GrB_PLUS_FP64, C, I3, 3, J3, 2, NULL)) ; + + J3 [0] = 999 ; + ERR (GxB_subassign (C, C, GrB_PLUS_FP64, C, I3, 3, J3, 2, NULL)) ; + ERR (GxB_subassign (A, NULL, GrB_PLUS_FP64, x_double, I3, 1, J3, 1, NULL)) ; + //-------------------------------------------------------------------------- // assign //-------------------------------------------------------------------------- @@ -2663,8 +2685,6 @@ void mexFunction // select //-------------------------------------------------------------------------- - // int nmalloc3 = GB_thread_local.nmalloc ; - CHECK (selectop == NULL) ; OK (GxB_SelectOp_new (&selectop, fselect, GrB_FP64)) ; CHECK (selectop != NULL) ; @@ -2734,8 +2754,6 @@ void mexFunction ERR (GxB_select (A , NULL, NULL, GxB_TRIL, C , &thresh, d0)) ; - // int nmalloc4 = GB_thread_local.nmalloc ; - //-------------------------------------------------------------------------- // reduce to scalar //-------------------------------------------------------------------------- @@ -3233,8 +3251,6 @@ void mexFunction // SelectOp check //-------------------------------------------------------------------------- - // int nmalloc5 = GB_thread_local.nmalloc ; - printf ("\n-------------- GB_SelectOp_check:\n") ; WHERE ("GB_SelectOp_check") ; @@ -3273,8 +3289,6 @@ void mexFunction printf ("\nAll GB_SelectOp_check tests passed (errors expected)\n") ; - // int nmalloc6 = GB_thread_local.nmalloc ; - //-------------------------------------------------------------------------- // Monoid check //-------------------------------------------------------------------------- @@ -3603,7 +3617,6 @@ void mexFunction A->i [0] = 1 ; A->i [1] = 0 ; - printf ("GB_mex_errors, line %d:", __LINE__) ; info = GB_Matrix_check (A, "jumbled", 3) ; printf ("jumbled info %d\n", info) ; CHECK (info == GrB_INDEX_OUT_OF_BOUNDS) ; @@ -3841,6 +3854,296 @@ void mexFunction OK (GB_op_is_second (GrB_SECOND_FP64, NULL)) ; + //-------------------------------------------------------------------------- + // check for inputs aliased with outputs + //-------------------------------------------------------------------------- + + Complex_finalize ( ) ; + + GrB_free (&A) ; + GrB_free (&B) ; + GrB_free (&C) ; + GrB_free (&E) ; + GrB_free (&F) ; + GrB_free (&v) ; + GrB_free (&u) ; + GrB_free (&z) ; + + #define NWHAT 12 + n = NWHAT ; + nvals = 40 ; + uvals = 4 ; + GrB_Index ilist [NWHAT] = { 8, 9, 0, 1, 5, 6, 11, 3, 2, 10, 7, 4 } ; + GrB_Index jlist [NWHAT] = { 0, 11, 1, 7, 8, 4, 2, 3, 5, 6, 10, 9 } ; + + OK (random_matrix (&A, false, false, n, n, nvals, 0, false)) ; + OK (GrB_Vector_new (&u, GrB_FP64, n)) ; + OK (GrB_Vector_setElement (u, (double) 3.4, 0)) ; + + E = A ; + GrB_Matrix_dup (&A, A) ; + CHECK (GB_mx_isequal (A,E)) ; + GrB_free (&E) ; + + z = u ; + GrB_Vector_dup (&u, u) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) z)) ; + GrB_free (&z) ; + + for (int what = 0 ; what <= 2 ; what++) + { + + GrB_Matrix Amask ; + GrB_Vector umask ; + switch (what) + { + case 0: Amask = NULL ; umask = NULL ; break ; + case 1: Amask = A ; umask = u ; break ; + case 2: + OK (random_matrix (&Amask, false, false, n, n, nvals, 0, false)) ; + OK (random_matrix (&F, false, false, n, 1, uvals, 0, false)) ; + umask = (GrB_Vector) F ; + F = NULL ; + break ; + } + + //---------------------------------------------------------------------- + // GrB_mxm, GrB_vxm, and GrB_mxv + //---------------------------------------------------------------------- + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_mxm (B, Amask, NULL, GxB_PLUS_TIMES_FP64, A, A, NULL)) ; + OK (GrB_mxm (A, Amask, NULL, GxB_PLUS_TIMES_FP64, A, A, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_vxm (v, umask, NULL, GxB_PLUS_TIMES_FP64, u, A, NULL)) ; + OK (GrB_vxm (u, umask, NULL, GxB_PLUS_TIMES_FP64, u, A, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_mxv (v, umask, NULL, GxB_PLUS_TIMES_FP64, A, u, NULL)) ; + OK (GrB_mxv (u, umask, NULL, GxB_PLUS_TIMES_FP64, A, u, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + //---------------------------------------------------------------------- + // GrB_eWiseMult + //---------------------------------------------------------------------- + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_eWiseMult (v, umask, NULL, GxB_PLUS_TIMES_FP64, u, u, NULL)) ; + OK (GrB_eWiseMult (u, umask, NULL, GxB_PLUS_TIMES_FP64, u, u, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_eWiseMult (v, umask, NULL, GxB_PLUS_FP64_MONOID, u, u, NULL)) ; + OK (GrB_eWiseMult (u, umask, NULL, GxB_PLUS_FP64_MONOID, u, u, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_eWiseMult (v, umask, NULL, GrB_PLUS_FP64, u, u, NULL)) ; + OK (GrB_eWiseMult (u, umask, NULL, GrB_PLUS_FP64, u, u, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_eWiseMult (B, Amask, NULL, GxB_PLUS_TIMES_FP64, A, A, NULL)) ; + OK (GrB_eWiseMult (A, Amask, NULL, GxB_PLUS_TIMES_FP64, A, A, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_eWiseMult (B, Amask, NULL, GxB_PLUS_FP64_MONOID, A, A, NULL)) ; + OK (GrB_eWiseMult (A, Amask, NULL, GxB_PLUS_FP64_MONOID, A, A, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_eWiseMult (B, Amask, NULL, GrB_PLUS_FP64, A, A, NULL)) ; + OK (GrB_eWiseMult (A, Amask, NULL, GrB_PLUS_FP64, A, A, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + //---------------------------------------------------------------------- + // GrB_eWiseAdd + //---------------------------------------------------------------------- + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_eWiseAdd (v, umask, NULL, GxB_PLUS_TIMES_FP64, u, u, NULL)) ; + OK (GrB_eWiseAdd (u, umask, NULL, GxB_PLUS_TIMES_FP64, u, u, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_eWiseAdd (v, umask, NULL, GxB_PLUS_FP64_MONOID, u, u, NULL)) ; + OK (GrB_eWiseAdd (u, umask, NULL, GxB_PLUS_FP64_MONOID, u, u, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_eWiseAdd (v, umask, NULL, GrB_PLUS_FP64, u, u, NULL)) ; + OK (GrB_eWiseAdd (u, umask, NULL, GrB_PLUS_FP64, u, u, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_eWiseAdd (B, Amask, NULL, GxB_PLUS_TIMES_FP64, A, A, NULL)) ; + OK (GrB_eWiseAdd (A, Amask, NULL, GxB_PLUS_TIMES_FP64, A, A, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_eWiseAdd (B, Amask, NULL, GxB_PLUS_FP64_MONOID, A, A, NULL)) ; + OK (GrB_eWiseAdd (A, Amask, NULL, GxB_PLUS_FP64_MONOID, A, A, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_eWiseAdd (B, Amask, NULL, GrB_PLUS_FP64, A, A, NULL)) ; + OK (GrB_eWiseAdd (A, Amask, NULL, GrB_PLUS_FP64, A, A, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + //---------------------------------------------------------------------- + // GrB_extract + //---------------------------------------------------------------------- + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_extract (u, umask, NULL, u, GrB_ALL, n, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_extract (B, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ; + OK (GrB_extract (A, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_extract (v, umask, NULL, A, GrB_ALL, n, 0, NULL)) ; + OK (GrB_extract (u, umask, NULL, A, GrB_ALL, n, 0, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + //---------------------------------------------------------------------- + // GxB_subassign + //---------------------------------------------------------------------- + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GxB_subassign (B, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ; + OK (GxB_subassign (A, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ; + GB_wait (B) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GxB_subassign (B, Amask, NULL, A, ilist, n, jlist, n, NULL)) ; + OK (GxB_subassign (A, Amask, NULL, A, ilist, n, jlist, n, NULL)) ; + GB_wait (B) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GxB_subassign (v, umask, NULL, u, GrB_ALL, n, NULL)) ; + OK (GxB_subassign (u, umask, NULL, u, GrB_ALL, n, NULL)) ; + GB_wait ((GrB_Matrix) v) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GxB_subassign (v, umask, NULL, u, ilist, n, NULL)) ; + OK (GxB_subassign (u, umask, NULL, u, ilist, n, NULL)) ; + GB_wait ((GrB_Matrix) v) ; + GB_wait ((GrB_Matrix) u) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + //---------------------------------------------------------------------- + // GrB_assign + //---------------------------------------------------------------------- + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_assign (B, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ; + OK (GrB_assign (A, Amask, NULL, A, GrB_ALL, n, GrB_ALL, n, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_assign (B, Amask, NULL, A, ilist, n, jlist, n, NULL)) ; + OK (GrB_assign (A, Amask, NULL, A, ilist, n, jlist, n, NULL)) ; + GB_wait (B) ; + GB_wait (A) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_assign (v, umask, NULL, u, GrB_ALL, n, NULL)) ; + OK (GrB_assign (u, umask, NULL, u, GrB_ALL, n, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_assign (v, umask, NULL, u, ilist, n, NULL)) ; + OK (GrB_assign (u, umask, NULL, u, ilist, n, NULL)) ; + GB_wait ((GrB_Matrix) v) ; + GB_wait ((GrB_Matrix) u) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + //---------------------------------------------------------------------- + // GrB_apply + //---------------------------------------------------------------------- + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_apply (B, Amask, NULL, GrB_AINV_FP64, A, NULL)) ; + OK (GrB_apply (A, Amask, NULL, GrB_AINV_FP64, A, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GrB_apply (v, umask, NULL, GrB_AINV_FP64, u, NULL)) ; + OK (GrB_apply (u, umask, NULL, GrB_AINV_FP64, u, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + //---------------------------------------------------------------------- + // GxB_select + //---------------------------------------------------------------------- + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GxB_select (B, Amask, NULL, GxB_NONZERO, A, NULL, NULL)) ; + OK (GxB_select (A, Amask, NULL, GxB_NONZERO, A, NULL, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + OK (GrB_Vector_dup (&v, u)) ; + OK (GxB_select (v, umask, NULL, GxB_NONZERO, u, NULL, NULL)) ; + OK (GxB_select (u, umask, NULL, GxB_NONZERO, u, NULL, NULL)) ; + CHECK (GB_mx_isequal ((GrB_Matrix) u, (GrB_Matrix) v)) ; + GrB_free (&v) ; + + //---------------------------------------------------------------------- + // GrB_transepose + //---------------------------------------------------------------------- + + OK (GrB_Matrix_dup (&B, A)) ; + OK (GrB_transpose (B, Amask, NULL, A, NULL)) ; + OK (GrB_transpose (A, Amask, NULL, A, NULL)) ; + CHECK (GB_mx_isequal (A,B)) ; + GrB_free (&B) ; + + if (what == 2) + { + GrB_free (&Amask) ; + GrB_free (&umask) ; + } + } + //-------------------------------------------------------------------------- // free all //-------------------------------------------------------------------------- @@ -3848,12 +4151,15 @@ void mexFunction // this is also done by FREE_ALL, but the list here is meant to be // accurate, so nmalloc should be zero at the check below - printf ("nmalloc %d\n", GB_thread_local.nmalloc) ; + GxB_stats (&stats) ; nmalloc = stats.nmalloc ; + printf ("nmalloc %d\n", nmalloc) ; - // int nmalloc7 = GB_thread_local.nmalloc ; + expected = GrB_NULL_POINTER ; + ERR (GxB_stats (NULL)) ; GrB_free (&Empty1) ; CHECK (Empty1 == NULL) ; GrB_free (&v) ; CHECK (v == NULL) ; + GrB_free (&u) ; CHECK (u == NULL) ; GrB_free (&A) ; CHECK (A == NULL) ; GrB_free (&u) ; CHECK (u == NULL) ; GrB_free (&z) ; CHECK (z == NULL) ; @@ -3887,30 +4193,25 @@ void mexFunction GrB_free (&dnt) ; CHECK (dnt == NULL) ; GrB_free (&dtt) ; CHECK (dtt == NULL) ; GrB_free (&dgunk) ; CHECK (dgunk == NULL) ; - // int nmalloc8 = GB_thread_local.nmalloc ; GrB_free (&selectop) ; CHECK (selectop == NULL) ; GrB_free (&selectopgunk) ; CHECK (selectopgunk == NULL) ; - // int nmalloc9 = GB_thread_local.nmalloc ; - - /* - printf ("nmalloc1 %d\n", nmalloc1) ; - printf ("nmalloc2 %d\n", nmalloc2) ; - printf ("nmalloc3 %d\n", nmalloc3) ; - printf ("nmalloc4 %d\n", nmalloc4) ; - printf ("nmalloc5 %d\n", nmalloc5) ; - printf ("nmalloc6 %d\n", nmalloc6) ; - printf ("nmalloc7 %d\n", nmalloc7) ; - printf ("nmalloc8 %d\n", nmalloc8) ; - printf ("nmalloc9 %d\n", nmalloc9) ; - */ - - printf ("nmalloc %d before complex_finalize\n", GB_thread_local.nmalloc) ; + + GxB_stats (&stats) ; nmalloc = stats.nmalloc ; + printf ("nmalloc %d before complex_finalize\n", nmalloc) ; Complex_finalize ( ) ; - printf ("nmalloc %d done\n", GB_thread_local.nmalloc) ; + GxB_stats (&stats) ; nmalloc = stats.nmalloc ; + printf ("nmalloc %d done\n", nmalloc) ; + GrB_finalize ( ) ; + GxB_stats (&stats) ; nmalloc = stats.nmalloc ; + printf ("nmalloc %d all freed\n", nmalloc) ; FREE_ALL ; + GxB_stats (&stats) ; nmalloc = stats.nmalloc ; + printf ("nmalloc %d all freed\n", nmalloc) ; GrB_finalize ( ) ; - CHECK (GB_thread_local.nmalloc == 0) ; + GxB_stats (&stats) ; nmalloc = stats.nmalloc ; + printf ("nmalloc %d after finalize\n", nmalloc) ; + CHECK (nmalloc == 0) ; printf ("\ncheck errlog.txt for errors tested\n") ; printf ("All error-handling tests passed" diff --git a/GraphBLAS/Test/GB_mex_extractTuples.c b/GraphBLAS/Test/GB_mex_extractTuples.c index e8f62a5871..6e6da5716d 100644 --- a/GraphBLAS/Test/GB_mex_extractTuples.c +++ b/GraphBLAS/Test/GB_mex_extractTuples.c @@ -12,7 +12,7 @@ #define FREE_ALL \ { \ GB_MATRIX_FREE (&A) ; \ - GB_FREE_MEMORY (Xtemp) ; \ + GB_FREE_MEMORY (Xtemp, nvals, sizeof (double complex)) ; \ GB_mx_put_global (malloc_debug) ; \ } @@ -30,6 +30,7 @@ void mexFunction void *Y = NULL ; void *Xtemp = NULL ; void *X = NULL ; + GrB_Index nvals = 0 ; // check inputs if (nargout > 3 || nargin < 1 || nargin > 2) @@ -50,7 +51,6 @@ void mexFunction mxClassID aclass = GB_mx_Type_to_classID (A->type) ; // get the number of entries in A - GrB_Index nvals ; GrB_Matrix_nvals (&nvals, A) ; mxClassID xclass ; diff --git a/GraphBLAS/Test/GB_mex_mxm_alias.c b/GraphBLAS/Test/GB_mex_mxm_alias.c new file mode 100644 index 0000000000..5414968c53 --- /dev/null +++ b/GraphBLAS/Test/GB_mex_mxm_alias.c @@ -0,0 +1,100 @@ +//------------------------------------------------------------------------------ +// GB_mex_mxm_alias: C = accum(C,C*C) +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +#include "GB_mex.h" + +#define FREE_ALL \ +{ \ + GB_MATRIX_FREE (&C) ; \ + if (semiring != Complex_plus_times) \ + { \ + if (semiring != NULL) \ + { \ + GrB_free (&(semiring->add)) ; \ + } \ + GrB_free (&semiring) ; \ + } \ + GrB_free (&desc) ; \ + GB_mx_put_global (malloc_debug) ; \ +} + +void mexFunction +( + int nargout, + mxArray *pargout [ ], + int nargin, + const mxArray *pargin [ ] +) +{ + + bool malloc_debug = GB_mx_get_global ( ) ; + GrB_Matrix C = NULL ; + GrB_Semiring semiring = NULL ; + GrB_Descriptor desc = NULL ; + + // check inputs + if (nargout > 1 || nargin < 3 || nargin > 4) + { + mexErrMsgTxt ("Usage: C = GB_mex_mxm_alias (C, accum, semiring, desc)"); + } + + // get C (make a deep copy) + #define GET_DEEP_COPY \ + C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true) ; + #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ; + GET_DEEP_COPY ; + if (C == NULL) + { + FREE_ALL ; + mexErrMsgTxt ("C failed") ; + } + mxClassID cclass = GB_mx_Type_to_classID (C->type) ; + + // get semiring + if (C->type == Complex) + { + // semiring input argument is ignored and may be empty + semiring = Complex_plus_times ; + } + else + { + if (!GB_mx_mxArray_to_Semiring (&semiring, pargin [2], "semiring", + cclass)) + { + FREE_ALL ; + mexErrMsgTxt ("semiring failed") ; + } + } + + // get accum; default: NOP, default class is class(C) + GrB_BinaryOp accum ; + if (!GB_mx_mxArray_to_BinaryOp (&accum, pargin [1], "accum", + GB_NOP_opcode, cclass, C->type == Complex, + semiring->add->op->ztype == Complex)) + { + FREE_ALL ; + mexErrMsgTxt ("accum failed") ; + } + + // get desc + if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (3), "desc")) + { + FREE_ALL ; + mexErrMsgTxt ("desc failed") ; + } + + // C = accum(C,C*C) + METHOD (GrB_mxm (C, C, accum, semiring, C, C, desc)) ; + + // return C to MATLAB as a struct and free the GraphBLAS C + pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ; + + FREE_ALL ; +} + diff --git a/GraphBLAS/Test/GB_mex_op.c b/GraphBLAS/Test/GB_mex_op.c index 8eb1ad2845..eb643fd255 100644 --- a/GraphBLAS/Test/GB_mex_op.c +++ b/GraphBLAS/Test/GB_mex_op.c @@ -19,9 +19,9 @@ #define FREE_ALL \ { \ - if (op_ztype == Complex && Z != NULL) GB_FREE_MEMORY (Z) ; \ - if (X_type == Complex && X != NULL) GB_FREE_MEMORY (X) ; \ - if (Y_type == Complex && Y != NULL) GB_FREE_MEMORY (Y) ; \ + if (op_ztype == Complex) GB_FREE_MEMORY (Z, nx+1, sizeof (double complex));\ + if (X_type == Complex) GB_FREE_MEMORY (X, nx+1, sizeof (double complex));\ + if (Y_type == Complex) GB_FREE_MEMORY (Y, ny+1, sizeof (double complex));\ GB_mx_put_global (malloc_debug) ; \ } @@ -38,6 +38,8 @@ void mexFunction void *X = NULL, *Y = NULL, *Z = NULL ; GrB_Type X_type = NULL, Y_type = NULL ; + int64_t nrows = 0, ncols = 0, nx = 0, ny = 0, nrows2 = 0, ncols2 = 0 ; + size_t Y_size = 1 ; bool malloc_debug = GB_mx_get_global ( ) ; @@ -100,9 +102,9 @@ void mexFunction // get X //-------------------------------------------------------------------------- - int64_t nrows, ncols ; mxClassID xclass ; GB_mx_mxArray_to_array (pargin [1], &X, &nrows, &ncols, &xclass, &X_type) ; + nx = nrows * ncols ; if (X_type == NULL) { FREE_ALL ; @@ -117,19 +119,16 @@ void mexFunction mexErrMsgTxt ("op xtype not compatible with X") ; } - int64_t n = nrows * ncols ; - //-------------------------------------------------------------------------- // get Y //-------------------------------------------------------------------------- - size_t Y_size = 1 ; if (nargin > 2) { - int64_t nrows2, ncols2 ; mxClassID yclass ; GB_mx_mxArray_to_array (pargin [2], &Y, &nrows2, &ncols2, &yclass, &Y_type) ; + ny = nrows2 * ncols2 ; if (nrows2 != nrows || ncols2 != ncols) { FREE_ALL ; @@ -163,7 +162,7 @@ void mexFunction else if (op_ztype == Complex) { // Z is complex, create a temporary array - GB_MALLOC_MEMORY (Z, n + 1, sizeof (double complex)) ; + GB_MALLOC_MEMORY (Z, nx + 1, sizeof (double complex)) ; // Z must be copied into the MATLAB pargout [0] when done, then freed } else @@ -193,7 +192,7 @@ void mexFunction GB_binary_function f_binary = op2->function ; GB_cast_function cast_Y = GB_cast_factory (op_ytype->code,Y_type->code); - for (int64_t k = 0 ; k < n ; k++) + for (int64_t k = 0 ; k < nx ; k++) { cast_X (xwork, X +(k*X_size), X_size) ; cast_Y (ywork, Y +(k*Y_size), Y_size) ; @@ -205,7 +204,7 @@ void mexFunction { // Z = f (X) GB_unary_function f_unary = op1->function ; - for (int64_t k = 0 ; k < n ; k++) + for (int64_t k = 0 ; k < nx ; k++) { cast_X (xwork, X +(k*X_size), X_size) ; f_unary (Z +(k*op_zsize), xwork) ; @@ -220,7 +219,7 @@ void mexFunction { pargout [0] = mxCreateNumericMatrix (nrows, ncols, mxDOUBLE_CLASS, mxCOMPLEX) ; - GB_mx_complex_split (n, Z, pargout [0]) ; + GB_mx_complex_split (nx, Z, pargout [0]) ; } //-------------------------------------------------------------------------- diff --git a/GraphBLAS/Test/GB_mex_reduce_to_scalar.c b/GraphBLAS/Test/GB_mex_reduce_to_scalar.c index cfac1a8300..1eace190f5 100644 --- a/GraphBLAS/Test/GB_mex_reduce_to_scalar.c +++ b/GraphBLAS/Test/GB_mex_reduce_to_scalar.c @@ -20,7 +20,7 @@ } \ if (ctype == Complex) \ { \ - GB_FREE_MEMORY (c) ; \ + GB_FREE_MEMORY (c, 1, sizeof (double complex)) ; \ } \ GB_mx_put_global (malloc_debug) ; \ } @@ -73,6 +73,7 @@ void mexFunction FREE_ALL ; mexErrMsgTxt ("A failed") ; } + // GB_check (A, "A to reduce", 3) ; // get reduce; default: NOP, default class is class(C) GrB_BinaryOp reduceop ; diff --git a/GraphBLAS/Test/GB_mex_setElement.c b/GraphBLAS/Test/GB_mex_setElement.c index 5c2df10acd..3e626cfba3 100644 --- a/GraphBLAS/Test/GB_mex_setElement.c +++ b/GraphBLAS/Test/GB_mex_setElement.c @@ -17,7 +17,7 @@ #define FREE_ALL \ { \ GB_MATRIX_FREE (&A) ; \ - GB_FREE_MEMORY (Xtemp) ; \ + GB_FREE_MEMORY (Xtemp, ni, sizeof (double complex)) ; \ GB_mx_put_global (malloc_debug) ; \ } @@ -105,6 +105,8 @@ void mexFunction void *Y ; GrB_Type xtype ; void *Xtemp = NULL ; + GrB_Index *I = NULL, ni = 0 ; + GrB_Index *J = NULL, nj = 0 ; // check inputs if (nargout > 1 || nargin != 4) @@ -125,7 +127,6 @@ void mexFunction mxClassID aclass = GB_mx_Type_to_classID (A->type) ; // get I - GrB_Index *I, ni ; if (!GB_mx_mxArray_to_indices (&I, pargin [1], &ni)) { FREE_ALL ; @@ -133,7 +134,6 @@ void mexFunction } // get J - GrB_Index *J, nj ; if (!GB_mx_mxArray_to_indices (&J, pargin [2], &nj)) { FREE_ALL ; diff --git a/GraphBLAS/Test/GB_mex_subassign.c b/GraphBLAS/Test/GB_mex_subassign.c index 8c28639e5d..eceb17b783 100644 --- a/GraphBLAS/Test/GB_mex_subassign.c +++ b/GraphBLAS/Test/GB_mex_subassign.c @@ -40,6 +40,8 @@ GB_MATRIX_FREE (&Mask) ; \ GB_MATRIX_FREE (&C) ; \ GrB_free (&desc) ; \ + GrB_free (&op) ; \ + if (!reduce_is_complex) GrB_free (&reduce) ; \ GB_mx_put_global (malloc_debug) ; \ } @@ -57,6 +59,9 @@ GrB_Index *I = NULL, ni ; GrB_Index *J = NULL, nj ; bool malloc_debug = false ; GrB_Info info = GrB_SUCCESS ; +GrB_Monoid reduce = NULL ; +GrB_BinaryOp op = NULL ; +bool reduce_is_complex = false ; //------------------------------------------------------------------------------ // assign: perform a single assignment @@ -271,8 +276,8 @@ GrB_Info many_subassign mxArray *p ; // [ turn off malloc debugging - bool save = GB_thread_local.malloc_debug ; - GB_thread_local.malloc_debug = false ; + bool save = GB_Global.malloc_debug ; + GB_Global.malloc_debug = false ; // get Mask (shallow copy) Mask = NULL ; @@ -338,7 +343,7 @@ GrB_Info many_subassign } } // restore malloc debugging to test the method - GB_thread_local.malloc_debug = save ; // ] + GB_Global.malloc_debug = save ; // ] // GB_check (desc, "desc", 3) ; @@ -382,13 +387,18 @@ void mexFunction C = NULL ; Mask = NULL ; desc = NULL ; + reduce_is_complex = false ; + op = NULL ; + reduce = NULL ; - if (nargout > 1 || ! (nargin == 2 || nargin == 6 || nargin == 7)) + if (!((nargout == 1 && (nargin == 2 || nargin == 6 || nargin == 7)) || + ((nargout == 2 || nargout == 3) && nargin == 8))) { - mexErrMsgTxt ("Usage: C = GB_mex_subassign " - "(C, Mask, accum, A, I, J, desc) or (C, Work)"); + mexErrMsgTxt ("Usage: [C,s,t] = GB_mex_subassign " + "(C, Mask, accum, A, I, J, desc, reduce) or (C, Work)"); } + //-------------------------------------------------------------------------- // get C (make a deep copy) //-------------------------------------------------------------------------- @@ -494,9 +504,98 @@ void mexFunction mexErrMsgTxt ("desc failed") ; } + if (nargin == 8 && (nargout == 2 || nargout == 3)) + { + // get reduce operator + if (!GB_mx_mxArray_to_BinaryOp (&op, PARGIN (7), "op", + GB_NOP_opcode, cclass, C->type == Complex, C->type == Complex)) + { + FREE_ALL ; + mexErrMsgTxt ("op failed") ; + } + + // get the reduce monoid + if (op == Complex_plus) + { + reduce_is_complex = true ; + reduce = Complex_plus_monoid ; + } + else if (op == Complex_times) + { + reduce_is_complex = true ; + reduce = Complex_times_monoid ; + } + else + { + // create the reduce monoid + if (!GB_mx_Monoid (&reduce, op, malloc_debug)) + { + FREE_ALL ; + mexErrMsgTxt ("reduce failed") ; + } + } + } + // C(I,J) = A METHOD (assign ( )) ; + + // apply the reduce monoid + if (nargin == 8 && (nargout == 2 || nargout == 3)) + { + // if (C->nzombies > 0) + // printf ("do the reduce thing, zombies %lld\n", C->nzombies) ; + // GB_check (C, "C to reduce", 1) ; + + #define REDUCE(type) \ + { \ + type c = 0 ; \ + GrB_reduce (&c, NULL, reduce, C, NULL) ; \ + pargout [1] = mxCreateNumericMatrix (1, 1, cclass, mxREAL) ; \ + void *p = mxGetData (pargout [1]) ; \ + memcpy (p, &c, sizeof (type)) ; \ + double d = 0 ; \ + GrB_reduce (&d, NULL, GxB_PLUS_FP64_MONOID, C, NULL) ; \ + if (nargout > 2) pargout [2] = mxCreateDoubleScalar (d) ; \ + } \ + break ; + + if (reduce_is_complex) + { + double c [2] = {0, 0} ; + GrB_reduce ((void *) c, NULL, reduce, C, NULL) ; + pargout [1] = mxCreateNumericMatrix (1, 1, + mxDOUBLE_CLASS, mxCOMPLEX) ; + GB_mx_complex_split (1, c, pargout [1]) ; + } + else + { + switch (cclass) + { + + case mxLOGICAL_CLASS : REDUCE (bool) ; + case mxINT8_CLASS : REDUCE (int8_t) ; + case mxUINT8_CLASS : REDUCE (uint8_t) ; + case mxINT16_CLASS : REDUCE (int16_t) ; + case mxUINT16_CLASS : REDUCE (uint16_t) ; + case mxINT32_CLASS : REDUCE (int32_t) ; + case mxUINT32_CLASS : REDUCE (uint32_t) ; + case mxINT64_CLASS : REDUCE (int64_t) ; + case mxUINT64_CLASS : REDUCE (uint64_t) ; + case mxSINGLE_CLASS : REDUCE (float) ; + case mxDOUBLE_CLASS : REDUCE (double) ; + + case mxCELL_CLASS : + case mxCHAR_CLASS : + case mxUNKNOWN_CLASS : + case mxFUNCTION_CLASS: + case mxSTRUCT_CLASS : + default : + FREE_ALL ; + mexErrMsgTxt ("unsupported class") ; + } + } + } } //-------------------------------------------------------------------------- @@ -505,6 +604,7 @@ void mexFunction ASSERT_OK (GB_check (C, "Final C before wait", 0)) ; GrB_wait ( ) ; + // GB_check (C, "C final", 1) ; pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C assign result", true) ; FREE_ALL ; } diff --git a/GraphBLAS/Test/GB_mex_subassign_alias.c b/GraphBLAS/Test/GB_mex_subassign_alias.c new file mode 100644 index 0000000000..25210e4670 --- /dev/null +++ b/GraphBLAS/Test/GB_mex_subassign_alias.c @@ -0,0 +1,78 @@ +//------------------------------------------------------------------------------ +// GB_mex_subassign_alias: C(:,:) = accum(C(:,:),C) +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +#include "GB_mex.h" + +#define FREE_ALL \ +{ \ + GB_MATRIX_FREE (&C) ; \ + GrB_free (&desc) ; \ + GB_mx_put_global (malloc_debug) ; \ +} + +void mexFunction +( + int nargout, + mxArray *pargout [ ], + int nargin, + const mxArray *pargin [ ] +) +{ + + bool malloc_debug = GB_mx_get_global ( ) ; + GrB_Matrix C = NULL ; + GrB_Descriptor desc = NULL ; + + // check inputs + if (nargout > 1 || nargin < 2 || nargin > 3) + { + mexErrMsgTxt ("Usage: C = GB_mex_subassign_alias (C, accum, desc)"); + } + + // get C (make a deep copy) + #define GET_DEEP_COPY \ + C = GB_mx_mxArray_to_Matrix (pargin [0], "C input", true) ; + #define FREE_DEEP_COPY GB_MATRIX_FREE (&C) ; + GET_DEEP_COPY ; + if (C == NULL) + { + FREE_ALL ; + mexErrMsgTxt ("C failed") ; + } + mxClassID cclass = GB_mx_Type_to_classID (C->type) ; + + // get accum; default: NOP, default class is class(C) + GrB_BinaryOp accum ; + if (!GB_mx_mxArray_to_BinaryOp (&accum, pargin [1], "accum", + GB_NOP_opcode, cclass, C->type == Complex, C->type == Complex)) + { + FREE_ALL ; + mexErrMsgTxt ("accum failed") ; + } + + // get desc + if (!GB_mx_mxArray_to_Descriptor (&desc, PARGIN (2), "desc")) + { + FREE_ALL ; + mexErrMsgTxt ("desc failed") ; + } + + GrB_Index nrows, ncols ; + GrB_Matrix_nvals (&nrows, C) ; + GrB_Matrix_nvals (&ncols, C) ; + + // C(:,:) = accum (C(:,:),C) + METHOD (GxB_subassign (C, C, accum, C, GrB_ALL, nrows, GrB_ALL, ncols, desc)) ; + + // return C to MATLAB as a struct and free the GraphBLAS C + pargout [0] = GB_mx_Matrix_to_mxArray (&C, "C output", true) ; + + FREE_ALL ; +} + diff --git a/GraphBLAS/Test/GB_mx_get_global.c b/GraphBLAS/Test/GB_mx_get_global.c index 17ef262cd6..eafbe23f7d 100644 --- a/GraphBLAS/Test/GB_mx_get_global.c +++ b/GraphBLAS/Test/GB_mx_get_global.c @@ -63,7 +63,7 @@ bool GB_mx_get_global // true if doing malloc_debug // return malloc debug status //-------------------------------------------------------------------------- - // the caller will set GB_thread_local.malloc_debug, not done here + // the caller will set GB_Global.malloc_debug, not done here return (malloc_debug) ; } diff --git a/GraphBLAS/Test/GB_mx_isequal.c b/GraphBLAS/Test/GB_mx_isequal.c new file mode 100644 index 0000000000..94a9477df9 --- /dev/null +++ b/GraphBLAS/Test/GB_mx_isequal.c @@ -0,0 +1,60 @@ +//------------------------------------------------------------------------------ +// GB_mx_isequal: check if two matrices are equal +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +#include "GB_mex.h" + +bool GB_mx_isequal // true if A and B are exactly the same +( + GrB_Matrix A, + GrB_Matrix B +) +{ + + if (A == B) return (true) ; + if (A == NULL) return (false) ; + if (B == NULL) return (false) ; + + if (A->magic != B->magic) return (false) ; + if (A->type != B->type ) return (false) ; + if (A->nrows != B->nrows) return (false) ; + if (A->ncols != B->ncols) return (false) ; + if (NNZ (A) != NNZ (B) ) return (false) ; + + // these differences are OK: + // if (A->nzmax != B->nzmax) return (false) ; + // if (A->max_npending != B->max_npending) return (false) ; + // queue_next and queue_prev are expected to differ + + if (A->p_shallow != B->p_shallow ) return (false) ; + if (A->i_shallow != B->i_shallow ) return (false) ; + if (A->x_shallow != B->i_shallow ) return (false) ; + if (A->npending != B->npending ) return (false) ; + if (A->sorted_pending != B->sorted_pending ) return (false) ; + if (A->operator_pending != B->operator_pending) return (false) ; + if (A->nzombies != B->nzombies ) return (false) ; + if (A->enqueued != B->enqueued ) return (false) ; + + int64_t n = A->ncols ; + int64_t nnz = NNZ (A) ; + size_t s = sizeof (int64_t) ; + size_t a = A->type->size ; + if (!GB_mx_same ((char *) A->p, (char *) B->p, (n+1) * s)) return (false) ; + if (!GB_mx_same ((char *) A->i, (char *) B->i, nnz * s)) return (false) ; + if (!GB_mx_xsame (A->x, B->x, nnz, a, A->i)) return (false) ; + + int64_t np = A->npending ; + if (!GB_mx_same ((char *) A->ipending, (char *) B->ipending, np * s)) + return (false) ; + if (!GB_mx_same ((char *) A->jpending, (char *) B->jpending, np * s)) + return (false) ; + if (!GB_mx_xsame (A->xpending, B->xpending, np, a, A->i)) return (false) ; + + return (true) ; +} + diff --git a/GraphBLAS/Test/GB_mx_mxArray_to_Matrix.c b/GraphBLAS/Test/GB_mx_mxArray_to_Matrix.c index 8435423e71..e8b326aac0 100644 --- a/GraphBLAS/Test/GB_mx_mxArray_to_Matrix.c +++ b/GraphBLAS/Test/GB_mx_mxArray_to_Matrix.c @@ -127,14 +127,6 @@ GrB_Matrix GB_mx_mxArray_to_Matrix // returns GraphBLAS version of A // get the GraphBLAS types atype_in = GB_mx_classID_to_Type (aclass_in) ; atype_out = GB_mx_classID_to_Type (aclass_out) ; - ASSERT_OK (GB_check (atype_in, "A type in", 0)) ; - ASSERT_OK (GB_check (atype_out, "A type out", 0)) ; - if (atype_in == NULL || atype_out == NULL) - { - FREE_ALL ; - mexWarnMsgIdAndTxt ("GB:warn", "types must be numeric") ; - return (NULL) ; - } } // get the size and content of the MATLAB matrix @@ -142,8 +134,34 @@ GrB_Matrix GB_mx_mxArray_to_Matrix // returns GraphBLAS version of A int64_t ncols = mxGetN (Amatrix) ; int64_t *Mp = (int64_t *) mxGetJc (Amatrix) ; int64_t *Mi = (int64_t *) mxGetIr (Amatrix) ; - void *Mx = mxGetData (Amatrix) ; int64_t anz = Mp [ncols] ; + void *Mx = mxGetData (Amatrix) ; + + // look for A.values + if (mxIsStruct (A_matlab)) + { + int fieldnumber = mxGetFieldNumber (A_matlab, "values") ; + if (fieldnumber >= 0) + { + mxArray *values = mxGetFieldByNumber (A_matlab, 0, fieldnumber) ; + if (mxGetNumberOfElements (values) >= anz) + { + Mx = mxGetData (values) ; + aclass_in = mxGetClassID (values) ; + atype_in = GB_mx_classID_to_Type (aclass_in) ; + } + } + } + + ASSERT_OK (GB_check (atype_in, "A type in", 0)) ; + ASSERT_OK (GB_check (atype_out, "A type out", 0)) ; + if (atype_in == NULL || atype_out == NULL) + { + FREE_ALL ; + mexWarnMsgIdAndTxt ("GB:warn", "types must be numeric") ; + return (NULL) ; + } + GrB_Info info ; // get the pattern of A diff --git a/GraphBLAS/Test/GB_mx_mxArray_to_indices.c b/GraphBLAS/Test/GB_mx_mxArray_to_indices.c index 4164ac3718..281108ebec 100644 --- a/GraphBLAS/Test/GB_mx_mxArray_to_indices.c +++ b/GraphBLAS/Test/GB_mx_mxArray_to_indices.c @@ -22,7 +22,7 @@ bool GB_mx_mxArray_to_indices // true if successful, false otherwise (*handle) = NULL ; GrB_Index *I ; - if (mxIsEmpty (I_matlab)) + if (I_matlab == NULL || mxIsEmpty (I_matlab)) { I = (GrB_Index *) GrB_ALL ; // like the ":" in C=A(:,j) (*ni) = 0 ; diff --git a/GraphBLAS/Test/GB_mx_put_global.c b/GraphBLAS/Test/GB_mx_put_global.c index d74c7e1003..3117139c94 100644 --- a/GraphBLAS/Test/GB_mx_put_global.c +++ b/GraphBLAS/Test/GB_mx_put_global.c @@ -35,9 +35,12 @@ void GB_mx_put_global GrB_finalize ( ) ; - if (GB_thread_local.nmalloc != 0) + GxB_Statistics stats ; + GxB_stats (&stats) ; + if (stats.nmalloc != 0) { - printf ("GraphBLAS nmalloc "GBd"!\n", GB_thread_local.nmalloc) ; + printf ("GraphBLAS nmalloc "GBd"! inuse "GBd" maxused "GBd"\n", + stats.nmalloc, stats.inuse, stats.maxused) ; mexErrMsgTxt ("memory leak!") ; } } diff --git a/GraphBLAS/Test/GB_mx_same.c b/GraphBLAS/Test/GB_mx_same.c new file mode 100644 index 0000000000..2ae631bc69 --- /dev/null +++ b/GraphBLAS/Test/GB_mx_same.c @@ -0,0 +1,28 @@ +//------------------------------------------------------------------------------ +// GB_mx_same: check if two arrays are equal +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +#include "GB_mex.h" + +bool GB_mx_same // true if arrays X and Y are the same +( + char *X, + char *Y, + int64_t len // length of X and Y +) +{ + if (X == Y) return (true) ; + if (X == NULL) return (false) ; + if (Y == NULL) return (false) ; + for (int64_t i = 0 ; i < len ; i++) + { + if (X [i] != Y [i]) return (false) ; + } + return (true) ; +} + diff --git a/GraphBLAS/Test/GB_mx_xsame.c b/GraphBLAS/Test/GB_mx_xsame.c new file mode 100644 index 0000000000..df89bc0092 --- /dev/null +++ b/GraphBLAS/Test/GB_mx_xsame.c @@ -0,0 +1,32 @@ +//------------------------------------------------------------------------------ +// GB_mx_xsame: check if two arrays are equal (ignoring zombies) +//------------------------------------------------------------------------------ + +// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +// http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +//------------------------------------------------------------------------------ + +#include "GB_mex.h" + +bool GB_mx_xsame // true if arrays X and Y are the same (ignoring zombies) +( + char *X, + char *Y, + int64_t len, // length of X and Y + size_t s, // size of each entry of X and Y + int64_t *I // row indices (for zombies), same length as X and Y +) +{ + if (X == Y) return (true) ; + if (X == NULL) return (false) ; + if (Y == NULL) return (false) ; + if (I == NULL) return (false) ; + for (int64_t i = 0 ; i < len ; i++) + { + // check X [i] and Y [i], but ignore zombies + if (I [i] >= 0 && !GB_mx_same (X+i*s, Y+i*s, s)) return (false) ; + } + return (true) ; +} + diff --git a/GraphBLAS/Test/GB_spec_identity.m b/GraphBLAS/Test/GB_spec_identity.m index a062aa3718..c721a2f3b8 100644 --- a/GraphBLAS/Test/GB_spec_identity.m +++ b/GraphBLAS/Test/GB_spec_identity.m @@ -1,5 +1,5 @@ function identity = GB_spec_identity (arg1,arg2) -%GB_SPEC_IDENTITY the additive identity of a semiring +%GB_SPEC_IDENTITY the additive identity of a monoid % % identity = GB_spec_identity (add) ; % or diff --git a/GraphBLAS/Test/GB_spec_select.m b/GraphBLAS/Test/GB_spec_select.m index e8fdecdd0a..1d2b5b0e25 100644 --- a/GraphBLAS/Test/GB_spec_select.m +++ b/GraphBLAS/Test/GB_spec_select.m @@ -1,5 +1,5 @@ function C = GB_spec_select (C, Mask, accum, opname, A, k, descriptor) -%GB_SPEC_APPLY a MATLAB mimic of GrB_select +%GB_SPEC_SELECT a MATLAB mimic of GxB_select % % Usage: % C = GB_spec_select (C, Mask, accum, opname, A, k, descriptor) diff --git a/GraphBLAS/Test/Makefile b/GraphBLAS/Test/Makefile index 648cab796f..ca393fec81 100644 --- a/GraphBLAS/Test/Makefile +++ b/GraphBLAS/Test/Makefile @@ -1,10 +1,15 @@ #------------------------------------------------------------------------------- -# GraphBLAS/MATLAB/Makefile +# GraphBLAS/Test/Makefile +#------------------------------------------------------------------------------- + +# SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +# http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + #------------------------------------------------------------------------------- # This Makefile is only intended to clean up the compiled files and files # created by the gbmake.m function in MATLAB. To compile GraphBLAS for use in -# MATLAB type 'gbmake' in MATLAB. +# MATLAB type 'gbmake' in MATLAB. To run the tests in Test/ type 'testall'. clean: $(RM) *.o *.obj diff --git a/GraphBLAS/Test/gbmake.m b/GraphBLAS/Test/gbmake.m index 910dd1c46c..06ab8f655c 100644 --- a/GraphBLAS/Test/gbmake.m +++ b/GraphBLAS/Test/gbmake.m @@ -67,7 +67,8 @@ function gbmake (what, flags, mexfunctions, cfiles, hfiles, inc) end if (nargin < 4) - cfiles = [ dir('../Source/*.c') ; dir('GB_mx_*.c') ; dir('../Demo/Source/*.c') ] ; + cfiles = [ dir('../Source/*.c') ; dir('../Source/Generated/*.c') ; ... + dir('GB_mx_*.c') ; dir('../Demo/Source/*.c') ] ; end if (nargin < 5) @@ -76,13 +77,14 @@ function gbmake (what, flags, mexfunctions, cfiles, hfiles, inc) dir('Template/*.c') ; ... dir('Template/*.h') ; ... dir('../Source/*.h') ; ... + dir('../Source/Generated/*.h') ; ... dir('../Demo/Include*.h') ; ... dir('../Source/Template/*.h') ; ... dir('../Source/Template/*.c') ] ; end if (nargin < 6) - inc = '-ITemplate -I../Include -I../Source -I../Source/Template -I../Demo/Include' ; + inc = '-ITemplate -I../Include -I../Source -I../Source/Generated -I../Source/Template -I../Demo/Include' ; end %------------------------------------------------------------------------------- @@ -148,7 +150,8 @@ function gbmake (what, flags, mexfunctions, cfiles, hfiles, inc) % compile the cfile if it is newer than its object file, or any hfile if (tc > tobj || htime > tobj) % compile the cfile - fprintf ('.', cfile) ; + % fprintf ('.', cfile) ; + fprintf ('%s\n', cfile) ; mexcmd = sprintf ('mex -c %s -silent %s %s', flags, inc, cfile) ; eval (mexcmd) ; any_c_compiled = true ; @@ -178,7 +181,8 @@ function gbmake (what, flags, mexfunctions, cfiles, hfiles, inc) % compile the mexFunction mexcmd = sprintf ('mex %s -silent %s %s %s', ... flags, inc, mexfunction, objlist) ; - fprintf ('.') ; + % fprintf (':') ; + fprintf ('%s\n', mexfunction) ; eval (mexcmd) ; end end diff --git a/GraphBLAS/Test/gg.m b/GraphBLAS/Test/gg.m deleted file mode 100644 index 4f81980393..0000000000 --- a/GraphBLAS/Test/gg.m +++ /dev/null @@ -1,10 +0,0 @@ - -gbmake -clear -A = sparse (rand (4)) -hi = 1 ; -lo = -2 ; -C = GB_mex_band (A,lo,hi) -C2 = triu (tril (A,hi), lo) ; -assert (isequal (C,C2)) -full (C) diff --git a/GraphBLAS/Test/test06.m b/GraphBLAS/Test/test06.m index 7f86d38e5d..f9220370fa 100644 --- a/GraphBLAS/Test/test06.m +++ b/GraphBLAS/Test/test06.m @@ -1,4 +1,4 @@ -function test06 (A) +function test06 (A,B) %TEST06 test GrB_mxm on all semirings % % Usage: test06(A) @@ -98,14 +98,11 @@ function test06 (A) for k1 = 1:length(mult_ops) mulop = mult_ops {k1} ; if (n <= 500) - fprintf ('\n%6s', mulop) ; + fprintf ('\n%s', mulop) ; end for k2 = 1:length(add_ops) addop = add_ops {k2} ; - if (n <= 500) - fprintf (' %s', addop) ; - end for k3 = 1:length (classes) clas = classes {k3} ; diff --git a/GraphBLAS/Test/test28.m b/GraphBLAS/Test/test28.m new file mode 100644 index 0000000000..b8bfbcc77e --- /dev/null +++ b/GraphBLAS/Test/test28.m @@ -0,0 +1,27 @@ +function test28 +%TEST28 test mxm with aliased inputs, C = accum(C,C*C) + +% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +% http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +rng ('default') ; + +semiring.multiply = 'times' ; +semiring.add = 'plus' ; +semiring.class = 'double' ; + +seed = 1 ; +for n = [1 5 10 100] + + for trial = 1:30 + + C = GB_mex_random (n, n, 10*n, 0, seed) ; seed = seed + 1 ; + + C1 = GB_mex_mxm_alias (C, 'plus', semiring, [ ]) ; + C2 = GB_mex_mxm (C, C, 'plus', semiring, C, C, [ ]) ; + assert (isequal (C1, C2)) ; + end +end + +fprintf ('test28: mxm alias tests passed\n') ; + diff --git a/GraphBLAS/Test/test29.m b/GraphBLAS/Test/test29.m new file mode 100644 index 0000000000..7ae8914c7c --- /dev/null +++ b/GraphBLAS/Test/test29.m @@ -0,0 +1,47 @@ +function test29 +%TEST29 GrB_reduce with zombies + +fprintf ('\n------------------------- GrB_reduce with zombies\n') ; + +[accum_ops unary_ops add_ops classes] = GB_spec_opsall ; + +for m = [1 5 10] + for n = [1 5 10] + + rng ('default') ; + + for k3 = 1:length (classes) + aclas = classes {k3} ; + + clear C + C.matrix = 100 * sparse (rand (m,n)) ; + C.class = aclas ; + C.pattern = logical (spones (C.matrix)) ; + + A = GB_spec_random (m,n,0.1,100,aclas) ; + + if (isequal (aclas, 'logical')) + ops = {'or', 'and', 'xor', 'eq'} ; + else + ops = {'min', 'max', 'plus', 'times'} ; + end + + for kk4 = 1:length(ops) + fprintf ('.') ; + [C3,c1,c3] = GB_mex_subassign (C, [ ], [ ], A, ... + [ ], [ ], [ ], ops{kk4}) ; + cin = GB_spec_identity (ops {kk4}, aclas) ; + c2 = GB_mex_reduce_to_scalar (cin, '', ops{kk4}, C3) ; + assert (isequal (c1,c2)) ; + + op.opname = 'plus' ; + op.opclass = 'double' ; + c4 = GB_mex_reduce_to_scalar (0, '', op, C3) ; + assert (isequal (c3,c4)) ; + end + end + end +end + +fprintf ('\ntest29: all tests passed\n') ; + diff --git a/GraphBLAS/Test/test52.m b/GraphBLAS/Test/test52.m index 80dd269f2c..dcdb93db52 100644 --- a/GraphBLAS/Test/test52.m +++ b/GraphBLAS/Test/test52.m @@ -47,10 +47,12 @@ k = 10e6 ; fprintf ('\nbuilding random sparse matrices %d by M\n', k) ; -for m = 1:20 +for m = [1:8 10:2:20 50 100 500 1000 3000] A = sprandn (k, m, 0.1) ; B = sprandn (k, m, 0.1) ; Mask = spones (sprandn (m, m, 0.5)) ; + A (:,m) = sparse (rand (k,1)) ; + B (:,m) = sparse (rand (k,1)) ; % fprintf ('MATLAB:\n') ; tic @@ -77,7 +79,7 @@ % fprintf ('MATLAB:\n') ; tic - C = spones (Mask) .* (A'*B) ; + C = Mask .* (A'*B) ; t1 = toc ; % fprintf ('GrB AdotB:\n') ; @@ -87,7 +89,7 @@ % fprintf ('GrB A''*B native:\n') ; tic - C4 = spones (Mask) .* GB_mex_AxB (A,B, true) ; + C4 = Mask .* GB_mex_AxB (A,B, true) ; t4 = toc ; fprintf (... @@ -160,5 +162,82 @@ assert (isequal (C, C3)) ; assert (isequal (C, C4)) ; +fprintf ('\nA''*x where A is big and x is a dense vector\n') ; +Prob = ssget (2662) ; +A = Prob.A ; +n = size (A, 1) ; +x = sparse (rand (n,1)) ; +z = full (x) ; + +fprintf ('MATLAB: x full:\n') ; +tic +y0 = A'*z ; +toc + +fprintf ('MATLAB: x sparse:\n') ; +tic +y1 = A'*x ; +toc + +fprintf ('GrB AdotB:\n') ; +tic +y2 = GB_mex_AdotB (A,x) ; +toc + +fprintf ('GrB A''xB auto select:\n') ; +tic +y3 = GB_mex_AxB (A,x, true) ; +toc + +assert (isequal (y1, sparse (y0))) ; +assert (isequal (y1, y2)) ; +assert (isequal (y1, y3)) ; + +fprintf ('\nx''A where A is big and x is a dense vector\n') ; + +fprintf ('MATLAB: x full:\n') ; +tic +y0 = z'*A ; +toc + +fprintf ('MATLAB: x sparse:\n') ; +tic +y1 = x'*A ; +toc + +fprintf ('GrB AdotB:\n') ; +tic +y2 = GB_mex_AdotB (x,A) ; +toc + +fprintf ('GrB A''xB auto select:\n') ; +tic +y3 = GB_mex_AxB (x, A, true) ; +toc + +assert (isequal (y1, sparse (y0))) ; +assert (isequal (y1, y2)) ; +assert (isequal (y1, y3)) ; + +fprintf ('\nA*x where A is big and x is a dense vector\n') ; + +fprintf ('MATLAB: x full:\n') ; +tic +y0 = A*z ; +toc + +fprintf ('MATLAB: x sparse:\n') ; +tic +y1 = A*x ; +toc + +fprintf ('GrB AxB:\n') ; +tic +y3 = GB_mex_AxB (A, x, false) ; +toc + +assert (isequal (y1, sparse (y0))) ; +assert (isequal (y1, y3)) ; + fprintf ('\ntest52: all tests passed\n') ; diff --git a/GraphBLAS/Test/test69.m b/GraphBLAS/Test/test69.m new file mode 100644 index 0000000000..7c49d8d414 --- /dev/null +++ b/GraphBLAS/Test/test69.m @@ -0,0 +1,39 @@ +function test69 +%TEST69 test GrB_assign with aliased inputs, C(:,:) = accum(C(:,:),C) + +% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +% http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +rng ('default') ; + +semiring.multiply = 'times' ; +semiring.add = 'plus' ; +semiring.class = 'double' ; + +seed = 1 ; +for m = [1 5 10 100] + for n = [1 5 10 100] + for trial = 1:30 + A = GB_mex_random (m, n, 10*n, 0, seed) ; seed = seed + 1 ; + C = GB_mex_random (m, n, 10*n, 0, seed) ; seed = seed + 1 ; + + C1 = GB_mex_assign_alias (C, 'plus', [ ], [ ], [ ]) ; + C2 = GB_mex_assign (C, [ ], 'plus', C, [ ], [ ], [ ], 0) ; + assert (isequal (C1, C2)) ; + + I = uint64 (randperm (m) - 1) ; + J = uint64 (randperm (n) - 1) ; + + C1 = GB_mex_assign_alias (C, 'plus', I, J, [ ]) ; + C2 = GB_mex_assign (C, [ ], 'plus', C, I, J, [ ], 0) ; + assert (isequal (C1, C2)) ; + + C1 = GB_mex_subassign_alias (C, 'plus', [ ]) ; + C2 = GB_mex_subassign (C, C, 'plus', C, [ ], [ ], [ ]) ; + assert (isequal (C1, C2)) ; + end + end +end + +fprintf ('test69: assign alias tests passed\n') ; + diff --git a/GraphBLAS/Test/test75.m b/GraphBLAS/Test/test75.m new file mode 100644 index 0000000000..42c9ca8882 --- /dev/null +++ b/GraphBLAS/Test/test75.m @@ -0,0 +1,177 @@ +function test75 +%TEST75 test GrB_mxm and GrB_vxm on all semirings (A'B dot product) + +% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. +% http://suitesparse.com See GraphBLAS/Doc/License.txt for license. + +rng ('default') ; + +m = 200 ; +n = 5 ; +A_sparse = sprandn (m, n, 0.1) ; +A_sparse (:,3) = 0 ; +A_sparse (2,3) = 1.7 ; +A_sparse (18,3) = 2.2 ; +A_sparse (:,1:2) = sparse (rand (m,2)) ; +A_sparse (1,1) = 0; +A_sparse (18,1) = 0; +A_sparse (:,5) = 0 ; +A_sparse (1,5) = 11 ; +A_sparse (2,5) = 23 ; +A_sparse (18,5) = 33 ; + +B_sparse = sprandn (m, n, 0.1) ; +B_sparse (:,1) = 0 ; +B_sparse (1,1) = 3 ; +B_sparse (18,1) = 2 ; +B_sparse (:,[2 n]) = sparse (rand (m,2)) ; +B_sparse (3,2) = 0 ; +B_sparse (18,2) = 0 ; +A_sparse (:,3) = 0 ; +B_sparse (2,1) = 7 ; +B_sparse (18,1) = 8 ; +B_sparse (19,1) = 9 ; + +x_sparse = sparse (rand (m,1)) ; +x_sparse (99) = 0 ; + +y_sparse = sparse (zeros (m,1)) ; +y_sparse (99) = 1 ; + +A.matrix = A_sparse ; +A.class = 'see below' ; +A.pattern = logical (spones (A_sparse)) ; + +B.matrix = B_sparse ; +B.class = 'see below' ; +B.pattern = logical (spones (B_sparse)) ; + +X.matrix = x_sparse ; +X.class = 'see below' ; +X.pattern = logical (spones (x_sparse)) ; + +Y.matrix = y_sparse ; +Y.class = 'see below' ; +Y.pattern = logical (spones (y_sparse)) ; + +fprintf ('\n-------------- GrB_mxm, vxm (dot product) on all semirings\n') ; + +[mult_ops unary_ops add_ops classes semirings] = GB_spec_opsall ; + +Cin = sparse (n, n) ; +Xin = sparse (n, 1) ; + +Mask = sparse (ones (n,n)) ; +mask = sparse (ones (n,1)) ; + +dnn = struct ; +dtn = struct ( 'inp0', 'tran' ) ; +dnt = struct ( 'inp1', 'tran' ) ; +dtt = struct ( 'inp0', 'tran', 'inp1', 'tran' ) ; + +n_semirings = 0 ; + +% eq_eq_bool: 18, 8, 1 + +for k1 = 1:length(mult_ops) + mulop = mult_ops {k1} ; + fprintf ('\n%s', mulop) ; + + for k2 = 1:length(add_ops) + addop = add_ops {k2} ; + + for k3 = 1:length (classes) + clas = classes {k3} ; + + semiring.multiply = mulop ; + semiring.add = addop ; + semiring.class = clas ; + + % create the semiring. some are not valid because the or,and,xor,eq + % monoids can only be used when z is boolean for z=mult(x,y). + try + [mult_op add_op id] = GB_spec_semiring (semiring) ; + [mult_opname mult_opclass zclass] = GB_spec_operator (mult_op) ; + [ add_opname add_opclass] = GB_spec_operator (add_op) ; + identity = GB_spec_identity (semiring.add, add_opclass) ; + catch me + if (~isempty (strfind (me.message, 'gotcha'))) + semiring + pause + end + continue + end + + % there are 1344 semirings that pass this test: + % 17 ops: 8:(1st, 2nd, min, max, plus, minus, times, div) + % 6:(is*) + % 3:(or,and,xor) + % TxT->T + % each has 44 monoids: all 11 types: max,min,plus,times + % and 4 for boolean or,and,xor,eq + % 17*48 = 816 + % 6 ops: eq,ne,gt,lt,ge,le + % TxT->bool + % each has 11 types + % and 8 monoids (max,min,plus,times,or,and,xor,eq) + % 6*11*8 = 528 + % 816 + 528 = 1344 + % but only 960 are unique. + % see GrB_AxB_builtin for details. + + A.class = clas ; + B.class = clas ; + X.class = clas ; + Y.class = clas ; + + n_semirings = n_semirings + 1 ; + fprintf ('.') ; + + % C = A'*B, with mask + tic + C1 = GB_mex_mxm (Cin, Mask, [ ], semiring, A, B, dtn); + t2 = toc ; + C2 = GB_spec_mxm (Cin, Mask, [ ], semiring, A, B, dtn); + GB_spec_compare (C1, C2, id) ; + + % X = u*A, with mask + tic + C1 = GB_mex_vxm (Xin, mask, [ ], semiring, X, A, [ ]); + t2 = toc ; + C2 = GB_spec_vxm (Xin, mask, [ ], semiring, X, A, [ ]); + GB_spec_compare (C1, C2, id) ; + + if (k3 == 1) + % repeat but with typecasing, to test generic A'*B + A.class = 'double' ; + + % C = A'*B, with mask + tic + C1 = GB_mex_mxm (Cin, Mask, [ ], semiring, A, B, dtn); + t2 = toc ; + C2 = GB_spec_mxm (Cin, Mask, [ ], semiring, A, B, dtn); + GB_spec_compare (C1, C2, id) ; + + % X = u*A, with mask + tic + C1 = GB_mex_vxm (Xin, mask, [ ], semiring, X, A, [ ]); + t2 = toc ; + C2 = GB_spec_vxm (Xin, mask, [ ], semiring, X, A, [ ]); + GB_spec_compare (C1, C2, id) ; + + % X = u*A, with mask + tic + C1 = GB_mex_vxm (Xin, mask, [ ], semiring, Y, A, [ ]); + t2 = toc ; + C2 = GB_spec_vxm (Xin, mask, [ ], semiring, Y, A, [ ]); + GB_spec_compare (C1, C2, id) ; + + end + end + end +end + +n_semirings + +fprintf ('\ntest75: all tests passed\n') ; + diff --git a/GraphBLAS/Test/testall.m b/GraphBLAS/Test/testall.m index 464fdc023a..b4e4f13f39 100644 --- a/GraphBLAS/Test/testall.m +++ b/GraphBLAS/Test/testall.m @@ -49,16 +49,19 @@ function testall (longtests) logstat ('test15') ; % simple test of GB_mex_AxB logstat ('test17') ; % quick test of GrB_*_extractElement logstat ('test72') ; % several special cases -logstat ('test20') ; % quick test of GB_mex_mxm on a few semirings -logstat ('test25') ; % quick test of GxB_select logstat ('test26') ; % quick test of GxB_select -logstat ('test27') ; % quick test of GxB_select (band) +logstat ('test29') ; % reduce with zombies +logstat ('test69') ; % assign and subassign with alias +logstat ('test28') ; % mxm with aliased inputs, C = accum(C,C*C) logstat ('test11') ; % exhaustive test of GrB_extractTuples logstat ('test14') ; % GrB_reduce +logstat ('test20') ; % quick test of GB_mex_mxm on a few semirings logstat ('test00') ; % GB_mex_mis logstat ('test19') ; % GxB_subassign, many pending operators logstat ('test12') ; % Wathen finite-element matrices (short test) logstat ('test10') ; % GrB_apply +logstat ('test27') ; % quick test of GxB_select (band) +logstat ('test25') ; % quick test of GxB_select logstat ('test74') ; % test GrB_mxm on all semirings, just dot product method logstat ('test99') ; % GB_mex_transpose with explicit zeros in the Mask logstat ('test23') ; % quick test of GB_*_build @@ -67,6 +70,7 @@ function testall (longtests) logstat ('test24') ; % test of GrB_Matrix_reduce logstat ('test21') ; % quick test of GB_mex_subassign logstat ('test06') ; % test GrB_mxm on all semirings +logstat ('test75') ; % test GrB_mxm A'*B on all semirings logstat ('test19b') ; % GrB_assign, many pending operators logstat ('test22') ; % quick test of GB_mex_transpose @@ -78,7 +82,7 @@ function testall (longtests) if (longtests) % useful tests but not needed for statement coverage logstat ('test26(1)') ; % longer test of GxB_select - logstat ('test20(1)') ; % exhaustive test of GB_mex_mxm on all built-in semirings + logstat ('test20(1)') ; % test of GB_mex_mxm on all built-in semirings logstat ('test18(1)') ; % lengthy tests of GrB_eWiseAdd and eWiseMult logstat ('test08b') ; % quick test GB_mex_assign logstat ('test09b') ; % duplicate I,J test of GB_mex_assign diff --git a/GraphBLAS/Test/testc7.m b/GraphBLAS/Test/testc7.m index 609e4b9da6..4c720677a4 100644 --- a/GraphBLAS/Test/testc7.m +++ b/GraphBLAS/Test/testc7.m @@ -6,6 +6,9 @@ rng ('default') +dclear.outp = 'replace' ; +dclear.mask = 'scmp' ; + seed = 1 ; for m = [1 5 10 50] for n = [1 5 10 50] @@ -16,7 +19,9 @@ I = randperm (m, ni) ; J = randperm (n, nj) ; seed = seed + 1 ; - A = GB_mex_random (ni, nj, 4*(ni+nj), 1, seed) ; + A = GB_mex_random (ni, nj, 2*(ni+nj), 1, seed) ; + seed = seed + 1 ; + M = GB_mex_random (ni, nj, 4*(ni+nj), 0, seed) ; C1 = C ; C1 (I,J) = A ; @@ -25,6 +30,11 @@ C2 = GB_mex_subassign (C, [ ], [ ], A, I0, J0, []) ; assert (isequal (C1, C2.matrix)) ; + [C3,c1] = GB_mex_subassign (C, M, [ ], A, I0, J0, [], 'plus') ; + cin = complex (0,0) ; + c2 = GB_mex_reduce_to_scalar (cin, '', 'plus', C3) ; + assert (isequal (c1,c2)) ; + C1 = C ; C1 (I,J) = C1 (I,J) + A ; @@ -33,6 +43,20 @@ end end + + C = GB_mex_random (m, n, 100*(m*n), 1, seed) ; seed = seed + 1 ; + M = GB_mex_random (m, n, 4*(ni+nj), 0, seed) ; seed = seed + 1 ; + A = GB_mex_random (m, n, m+n, 1, seed) ; seed = seed + 1 ; + [C3,c1] = GB_mex_subassign (C, M, [ ], A, [ ], [ ], dclear, 'plus') ; + cin = complex (0,0) ; + c2 = GB_mex_reduce_to_scalar (cin, '', 'plus', C3) ; + assert (isequal (c1,c2)) ; + + [C3,c1] = GB_mex_subassign (C, [ ], [ ], A, [ ], [ ], dclear, 'plus') ; + cin = complex (0,0) ; + c2 = GB_mex_reduce_to_scalar (cin, '', 'plus', C3) ; + assert (isequal (c1,c2)) ; + end end diff --git a/GraphBLAS/Test/testca.m b/GraphBLAS/Test/testca.m index 0d92418ddb..c1a3dac26a 100644 --- a/GraphBLAS/Test/testca.m +++ b/GraphBLAS/Test/testca.m @@ -4,6 +4,7 @@ % SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017, All Rights Reserved. % http://suitesparse.com See GraphBLAS/Doc/License.txt for license. +rng ('default') ; dnt = struct ('inp1', 'tran') ; dtn = struct ('inp0', 'tran') ; dtt = struct ('inp0', 'tran', 'inp1', 'tran') ; @@ -12,7 +13,7 @@ for m = [1 5 10 100] for n = [1 5 10 100] for k = [1 5 10 100] - for trial = 1:20 + for trial = 1:30 A = GB_mex_random (m, k, 10*(m+k), 1, seed) ; seed = seed + 1 ; B = GB_mex_random (k, n, 10*(k+n), 1, seed) ; seed = seed + 1 ; diff --git a/KLU/Doc/Makefile b/KLU/Doc/Makefile index 8d7e7492d6..a275b4be59 100644 --- a/KLU/Doc/Makefile +++ b/KLU/Doc/Makefile @@ -24,15 +24,15 @@ distclean: clean KLU_UserGuide.pdf: KLU_UserGuide.tex KLU_UserGuide.bib \ ../Include/klu.h ../../BTF/Include/btf.h Makefile - echo '\begin{verbatim}' > klu_h.tex + echo '\\begin{verbatim}' > klu_h.tex expand -8 ../Include/klu.h >> klu_h.tex - echo '\end{verbatim}' >> klu_h.tex - echo '\begin{verbatim}' > btf_h.tex + echo '\\end{verbatim}' >> klu_h.tex + echo '\\begin{verbatim}' > btf_h.tex expand -8 ../../BTF/Include/btf.h >> btf_h.tex - echo '\end{verbatim}' >> btf_h.tex - echo '\begin{verbatim}' > klu_simple_c.tex + echo '\\end{verbatim}' >> btf_h.tex + echo '\\begin{verbatim}' > klu_simple_c.tex expand -8 ../Demo/klu_simple.c >> klu_simple_c.tex - echo '\end{verbatim}' >> klu_simple_c.tex + echo '\\end{verbatim}' >> klu_simple_c.tex pdflatex KLU_UserGuide bibtex KLU_UserGuide pdflatex KLU_UserGuide diff --git a/Makefile b/Makefile index d66aa25a65..fc8713a162 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ include SuiteSparse_config/SuiteSparse_config.mk # Compile the default rules for each package go: metis ( cd SuiteSparse_config && $(MAKE) ) + ( cd GraphBLAS && $(MAKE) ) ( cd AMD && $(MAKE) ) ( cd BTF && $(MAKE) ) ( cd CAMD && $(MAKE) ) @@ -36,6 +37,7 @@ endif # (note that CSparse is not installed; CXSparse is installed instead) install: metisinstall ( cd SuiteSparse_config && $(MAKE) install ) + ( cd GraphBLAS && $(MAKE) install ) ( cd AMD && $(MAKE) install ) ( cd BTF && $(MAKE) install ) ( cd CAMD && $(MAKE) install ) @@ -80,6 +82,7 @@ uninstall: $(RM) $(INSTALL_DOC)/SuiteSparse_README.txt ( cd SuiteSparse_config && $(MAKE) uninstall ) - ( cd metis-5.1.0 && $(MAKE) uninstall ) + - ( cd GraphBLAS && $(MAKE) uninstall ) ( cd AMD && $(MAKE) uninstall ) ( cd CAMD && $(MAKE) uninstall ) ( cd COLAMD && $(MAKE) uninstall ) @@ -106,9 +109,11 @@ ifeq (,$(MY_METIS_LIB)) endif $(RM) -r $(INSTALL_DOC) -# compile the dynamic libraries +# compile the dynamic libraries. For GraphBLAS, this also builds the +# static library library: metis ( cd SuiteSparse_config && $(MAKE) ) + ( cd GraphBLAS && $(MAKE) library ) ( cd AMD && $(MAKE) library ) ( cd BTF && $(MAKE) library ) ( cd CAMD && $(MAKE) library ) @@ -129,7 +134,9 @@ endif # ( cd PIRO_BAND && $(MAKE) library ) # ( cd SKYLINE_SVD && $(MAKE) library ) -# compile the static libraries (except for metis, which is only dynamic) +# compile the static libraries (except for metis and GraphBLAS. metis is only +# dynamic, and the single 'make library' for GraphBLAS makes both the dynamic +# and static libraries. static: metis ( cd SuiteSparse_config && $(MAKE) static ) ( cd AMD && $(MAKE) static ) @@ -157,6 +164,7 @@ purge: - ( cd SuiteSparse_config && $(MAKE) purge ) - ( cd metis-5.1.0 && $(MAKE) distclean ) - ( cd AMD && $(MAKE) purge ) + - ( cd GraphBLAS && $(MAKE) purge ) - ( cd CAMD && $(MAKE) purge ) - ( cd COLAMD && $(MAKE) purge ) - ( cd BTF && $(MAKE) purge ) @@ -182,6 +190,7 @@ purge: clean: - ( cd SuiteSparse_config && $(MAKE) clean ) - ( cd metis-5.1.0 && $(MAKE) clean ) + - ( cd GraphBLAS && $(MAKE) clean ) - ( cd AMD && $(MAKE) clean ) - ( cd CAMD && $(MAKE) clean ) - ( cd COLAMD && $(MAKE) clean ) @@ -202,6 +211,7 @@ clean: # Create the PDF documentation docs: + ( cd GraphBLAS && $(MAKE) docs ) ( cd AMD && $(MAKE) docs ) ( cd CAMD && $(MAKE) docs ) ( cd KLU && $(MAKE) docs ) diff --git a/README.txt b/README.txt index 539d2bad22..a435366803 100644 --- a/README.txt +++ b/README.txt @@ -1,8 +1,8 @@ SuiteSparse: A Suite of Sparse matrix packages at http://www.suitesparse.com -Nov 25, 2017. SuiteSparse VERSION 5.0.0 +Dec 28, 2017. SuiteSparse VERSION 5.1.2 -Now includes GraphBLAS 1.0.0 and a new interface to the SuiteSparse Matrix +Now includes GraphBLAS and a new interface to the SuiteSparse Matrix Collection (ssget), via MATLAB and a Java GUI, to http://sparse.tamu.edu. ------------------ @@ -334,6 +334,8 @@ Step-by-step details: and the compiled libraries are copied into SuiteSparse/lib. Documentation is copied into SuiteSparse/doc. + The GraphBLAS libraries are created by cmake and placed in GraphBLAS/build. + NOTE: on Linux, you may see some errors when you compile METIS ('make: *** No rule to make target 'w'.). You can safely ignore those errors. diff --git a/SuiteSparse_config/Makefile b/SuiteSparse_config/Makefile index ba3040681a..049768c731 100644 --- a/SuiteSparse_config/Makefile +++ b/SuiteSparse_config/Makefile @@ -7,7 +7,7 @@ export SUITESPARSE # version of SuiteSparse_config is also version of SuiteSparse meta-package LIBRARY = libsuitesparseconfig -VERSION = 5.1.0 +VERSION = 5.1.2 SO_VERSION = 5 default: library diff --git a/SuiteSparse_config/README.txt b/SuiteSparse_config/README.txt index 8129f5a04b..d03aa8f037 100644 --- a/SuiteSparse_config/README.txt +++ b/SuiteSparse_config/README.txt @@ -37,6 +37,7 @@ SuiteSparse_config is not required by these packages: CSparse a Concise Sparse matrix package MATLAB_Tools toolboxes for use in MATLAB + GraphBLAS graph algorithms in the language of linear algebra In addition, the xerbla/ directory contains Fortan and C versions of the BLAS/LAPACK xerbla routine, which is called when an invalid input is passed to diff --git a/SuiteSparse_config/SuiteSparse_config.h b/SuiteSparse_config/SuiteSparse_config.h index 3bd9593c80..f672f5e445 100644 --- a/SuiteSparse_config/SuiteSparse_config.h +++ b/SuiteSparse_config/SuiteSparse_config.h @@ -184,8 +184,8 @@ int SuiteSparse_divcomplex * * SuiteSparse contains the following packages: * - * SuiteSparse_config version 5.1.0 (version always the same as SuiteSparse) - * GraphBLAS version 1.1.0 + * SuiteSparse_config version 5.1.2 (version always the same as SuiteSparse) + * GraphBLAS version 1.1.2 * ssget version 2.0.0 * AMD version 2.4.6 * BTF version 1.2.6 @@ -235,11 +235,11 @@ int SuiteSparse_version /* returns SUITESPARSE_VERSION */ */ #define SUITESPARSE_HAS_VERSION_FUNCTION -#define SUITESPARSE_DATE "Dec 1, 2017" +#define SUITESPARSE_DATE "Dec 28, 2017" #define SUITESPARSE_VER_CODE(main,sub) ((main) * 1000 + (sub)) #define SUITESPARSE_MAIN_VERSION 5 #define SUITESPARSE_SUB_VERSION 1 -#define SUITESPARSE_SUBSUB_VERSION 0 +#define SUITESPARSE_SUBSUB_VERSION 2 #define SUITESPARSE_VERSION \ SUITESPARSE_VER_CODE(SUITESPARSE_MAIN_VERSION,SUITESPARSE_SUB_VERSION) diff --git a/SuiteSparse_config/SuiteSparse_config.mk b/SuiteSparse_config/SuiteSparse_config.mk index bb26ac3a38..9f456afc02 100644 --- a/SuiteSparse_config/SuiteSparse_config.mk +++ b/SuiteSparse_config/SuiteSparse_config.mk @@ -3,9 +3,11 @@ #=============================================================================== # This file contains all configuration settings for all packages in SuiteSparse, -# except for CSparse (which is stand-alone) and the packages in MATLAB_Tools. +# except for CSparse (which is stand-alone), the packages in MATLAB_Tools, +# and GraphBLAS. The configuration settings for GraphBLAS are determined by +# GraphBLAS/CMakeLists.txt -SUITESPARSE_VERSION = 5.1.0 +SUITESPARSE_VERSION = 5.1.2 #=============================================================================== # Options you can change without editing this file: diff --git a/metis-5.1.0/GKlib/Makefile b/metis-5.1.0/GKlib/Makefile index d17b4f44c9..30627cee4a 100644 --- a/metis-5.1.0/GKlib/Makefile +++ b/metis-5.1.0/GKlib/Makefile @@ -18,7 +18,7 @@ systype = $(shell uname -s) BUILDDIR = build/$(systype)-$(cputype) # Process configuration options. -CONFIG_FLAGS = -DCMAKE_VERBOSE_MAKEFILE=1 +CONFIG_FLAGS = ifneq ($(gdb), not-set) CONFIG_FLAGS += -DGDB=$(gdb) endif diff --git a/metis-5.1.0/Makefile b/metis-5.1.0/Makefile index e4ea0100c6..c6ce96d988 100644 --- a/metis-5.1.0/Makefile +++ b/metis-5.1.0/Makefile @@ -19,7 +19,7 @@ systype = $(shell uname -s) BUILDDIR = build/$(systype)-$(cputype) # Process configuration options. -CONFIG_FLAGS = -DCMAKE_VERBOSE_MAKEFILE=1 +CONFIG_FLAGS = ifeq ($(gklib_path), not-set) gklib_path = GKlib endif diff --git a/ssget/mat/HB/bcsstk20.mat b/ssget/mat/HB/bcsstk20.mat deleted file mode 100644 index ee3ea0de3651b50ddcb30927db444ebf04fc8085..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14968 zcmb7qWmuE%8@5P^D4o)xqSOeb83F<#4bmkI(j_A{QbJm!Tco?B89ll~a`fl{qigT@ z{omvL{_fa^?Y{5pyw2;q;@O@}N%FIzq!jZzZXRYO$SQi?Y_euv zY7^ebmh&aiKOTzwE$3Y|Xs_#`7BOT?dvbI#yv_UCy9s#>aAma=j?!?=UUO?ZkIG7q zyZA)>H8BROKjm5Cl;+Hu*WBhDQU&{H4rExn_UlC+9s91DP_1TaaZ!K0v1VE5%v%BB zsj$uCIXrX-XGLb54@B<=f*K!wTfUbS#TEHMigIJ6!Kmc9f!?#H-KIu=a3WtCWozph zkr8-?cTy5|sFinxbNz__{=4$9qd+iUOw1AT8ssqG*@wtm>#63C8(G|X<$jlaMKEP>=X&C;|xjVRAVp&$ade}(U%i=nVSuhw49CX*FH#)~@ z=`#RyuedYC$ZcM+6O~{4W0{vhR}a0I4|^AwyH`%X#Hefrza}atY~$e*{Ok-8r;m8y z%1RM3lha1SC#dHPGJMn%+fa$YJo@r5nM0l4NChXZZ{Y>Tl!EO@*lIHmgPI?hZ0-<@ zL8bZ^JV?nwR9@#!#?))+@E5a0lq9#gi8*sSGIMEOhVjtyz)o(n0|U4(88o<`CDa@h zz{gjoirHWdjcstjkj4EMHpqqX1i(}1TOh-D8vdfqR~L=}6Rf~bo;SoCWz&(}WTy<7 z@%I19r=2U*yhJw{(*VZMuKgmMJg<$RjsL9Ipk;Nm5xDhEB+Qp-pExDJ`{NApgCl_j z7-;;Ia{0Yej0Fsi{<4}PPJ|W04&KfP@$YOWrDCXFi+yvZLlC8YH7>60KecQf9yRN% z^g}!f3wD1B=D*fXaDOW7&6yBETFFVScg90yTw5*?OPD*xkXTs*W2k~}<;Ra%l0)rP zw+SyB_(@o$q52?`qD@bgi@fE%4IUg?<+#SmSTW46nL622WJ1v|e@Kr=oUKGs`EVpAB14OQ zTX7@+(T1X8*ro^gQMW53Fi4;*S5unx+6F#lTAe%wMVen0t$wufWoX7)s@cN4AZ&iR zRJoPJdF!);iwvBmm~Fuq@FVW6wRBk=d=`Q@XSqmPzlmM_*x>U}AV7snM4da+f25t7 ziXp>QA%+7fh>vU=IZ`-$cM)uE&v*COhp?4(Mz7DE-B|xIYYf_kCia@CMSP^K z?W{dx9{O!orRagq)W)Br{Zkiq0uo>t+ueJ`zxPLIJh{nFadhf8Yq~I@8}=*}L!3!e z>J6CRb0gpY6Y(VWiA$XB*=PFgy%tHPQR?;Dv0eNt$6SsL-Um-`0y#xq8B6dJKf$^@ zNbn@W!~OEQAwjwX|+BU^e&9PFz2k$W;RIh(VMc3xLwN(Xb!;_GS5VEO1XXrU(&-oJ-0tMdH&m9 zf@MOi3`o}duEnC0vy#>Y2ygHoJV2vM{(0(rK7H19l=JX;a`N<)clqxP&G$o% HT zHrcpoekg245z5Lor25a92LBAMq)1TUeq+5F!1=sxQka=L%qZ#8({Yp6wXZeEl?I#P z6NAa#2c9A*6&T`D3^d$~y!M8}lZC6@zB0@EZ$&X~MYcYYvT~DrboX6V%%6?a(o|7f zEWlwa%m_qJ_2`73S60GG2#=WtJPMbNP(x$$J|cb9Lvt!Yqrx)YMRS`bO_Dnsoy}+( zam5t8LStfzIm3GNRyLU?Clf^*F}4S@CgzpBjqVnT5ajJCKocZ|T9ccdxGHy03_%pn zNX#bDpS!-KgkjD|gzJb|nMOc{w?ilPV5oaYUo3!W@crIM7w4b=h6;jy5VL1+4>g7Kn#$Zo`c?{20N$1w>*#q|x@yX&L85SpC3L=)kk ziV?g!Xs(jBPxn+>BQ;ZQKmJEW^rK|ay}SI|FxUy|&zktq0@5p#cZVOl_^36E_o`K* ztx;mj23x?!3%v1Qzrb^R2=x>Rn^_H4je7uQyP~s?Q4(M_Rs?d;)q%cv3YuB~g;Y4rG1xL1j>if>7| zCslk(E%cbgwZC@BCT;9y3Z}V|mbWy+XnF!esFn6rdxwrOi5^D2D({hwxYLQ8$<)5{ zT)XWVMtis8L#(3`E;!~^OmKeF-G!;1$k5<1lc~{sKML)6+hu~c$$}Mrg!i+8tWYQN z-M>&pJD9_I4hg<$0PE|(r^de@6ZwSe8Faf(a&+<-v%YNyTl-HR!lWNrFW=a2SKI_PWQJD}Vu;Ma(;s0xj@1h!Hn%4Hl$N!U0Ad@q8 zofGv^n z^ZUCMR0g780X@X#eX@4QX=#QF%()b@G1#gFTDh|S)H-}Mvlogpa;rM34M6^QF@AO!}CyZUliJ-Rr zhu7QH?ga|s4koOv2)Qxr=p4Wm3t0|HB!Y*IKqo>lCP?#R_g6b9fwL8osO*PUPK=38 zwA#)Z4TVn8HUU&15?`3~R>YSRNg5nXMp=2WG>$lp+I*S=EI|2{sHIv0{n+mey^@SS zR3peQ9wMAbwVm!IX>D!JI}~(#uN{;nnDJgHc$rqQ!)>r#syQkbL};TE&h;OWB(^Whb?v(+a zWk14aUV9Cc7H*Y#3Taa*=(K-ZAJP)Qr1$%&$a9kDEK#rDf}AuFu}OaNNx=wSLRV4@ zg1?*j7cPP4KuVd>nf}#-+Sb~E5N(@ zckEp_`%G!IYQg1rhHbTDDCbIlVs39f8#OrXcR2^XQqp?0(5%9X>DI{uy<6OYwAR@~ zB!xj;j}eG@=QzOOBOajek`TYu zLs({Oryr7kJ&Vm?w3=)=6@G14Z#~P`ROPeY;y(RP2Og%7gZJ1yJ;vG>cF8}rw|kX* zsy%?7Nh~f)0T_Or-CnpAz6UcBZFwlA{=KYhVaCzus93y3#yS1=yqQB6ayf4Xy6M=j z7ihxA;H)KfZ^ zdf^?OZ<}^K`E4PgD;?KeYg<*S0x2FC)TrFl#qEE_6VffPzSewINqNvZ+4!~fT(B|v8385ILK?fNa}tU$|Hi(rXGEI()JDjtD*`kugIX4Qy? z4w>z_HWMVjV=+-rdnj>_iM!lo{gHMn=k2p+^J8pE7@5A`0U~y%CtxiqyPuxPMn$e1 zk;mmQN&r6=>|R)&r`fOlpVKu9;C>Iy080?N4p(HXF3kMPx8(i-4%)%@xxpv_j+_esa{R|6kT_(j&O| z&mUvxY~)_#=KditG)(q5v6PIDVjc*hlwD5;(q6Q#FFt)+Qo~YPfQ@Hmf`t{SHUrY0ilHm^rAQ zI~^uDvIaE{j~2$}@2|W|;B5hBs)_V%9w?L+Zz>G2nYZec4_a-J4=^F!?$F`*Sx z=YbiUiz@3OFi$X*_-MmhFSL@a(;;MF>abnPF#mWu)XUo^@?~Rz&+Pu^)FwbJriLPQ zC{g-4WlkFd<2Mg@o%HU3!gs0mszjePm5=-#UyUeK@-@j&g_a8vQSFFKXu$rK4wqHF zWztC3_v*T*v)kJ09e(W1Ie-!R%=6=D=luBhGVb%fBl*7U*5{{kR?r!wfx_#w{xwGr zhnDwVp9*$wsP8|Hb?JH*kDHxC~+1S;jmR7M1I8q?@LkF+xj z=H$xk9X)1S}y?+8M!pS&ngUd5cVf$Eu6D<#%l6m-!`l`4%pty z9+HqttPb`j&a;z!?Z|4~sP#SmP5cmYP$}pnqZ=ylWKz9$<<7+7F8$8psKLlLyewnb zPlW$x3srGM|MsZiyQdz(i06zk)JI)~+MlZ1WnvB^Kroo{-OdB2=0Pp1)Adnx`-|UB zE!(?2j~lCwi*mD#0+%&OH77t*w5o`OU-6=vQ5%z=n&v7zP0Hx0kDq6BDGB_|0F%BD zW8H{;ICMk&05+hAy`W@oFVb0-kphZ7DckvL7kjZ_>*%V(ME#y2J-|I*Gj+gb_9#!j zFvMk}9z!Om#IRLNZ*ppxmNsjb#&-=v#!mKlWWQ5Nq_eaxpfe;n6Xm8YuCo=HL*@jV6^P_y+ zQgO=3(Vw2(da-9kjWsZ9uQ5$czRhZC6n?G}Md(SY@*E*fyA<6VosS*+=mfyr+_ShH?dp*$?{ zeFwkkNy7n>koX~Nz@f!Zbk)70ER8LMnE?gqQ!>3ZVB~IEeq1n-LI-K9S63o59SZ(i zGmy9@Wh6YpS}x`bT_eLtYS{>{6?G>OGCFe@Enj39a5G70wYnzKlZm^5Y&e#`mPvdgY**h1npBG{mQscvJg6|nZviP}r<~h2qh@aBarr)zTZs0Mm zLNz&lFnpRpnz-5?cSDQ?8}PAXU0X`r={vprJ<2|30G08{(3q=5ynUZ;hr;2H`}6qO z`BwvVg>S9*ik_a{NqT#n$Sn$pu{JenS}qFV&;vL^7aP}-l&I?=AWi*@X|5)uePxBr z0{&BVQDH^w$;t?*ouj&@|NlC#;-6!A@lQ-*j!jMi;UYoUFph(`aD)o+ z!NaofjM{EGaMPH*t9m$zu-42wt3?bO@sqs7Fq@Z#lS7rc7IyzbNwP{Wtow*ZFcR8% z%&ed9vKl67peJ7kPg^lb*Ju8(Mp_WtP|wUKm~vM;om#v8@BqfKDSS9qq7J4p z`n(iO$j_^CT{vJY6$AMYv&yl>RNL&C!>74)nhb3NsLCdw0{JkO^xdVSxK-6u@b_@phyf~}JDrAA zgVB%g)3@f`MNV}$YAPhrUGbQ^eG&I~$fM%tt&fcKSYs`<|+y`eGi@{McIZwnbtsz8WDvoFmS6p~Fvt zYI4?YMyLpss!{l2keU!Z{pdn~zM#8@|a>GW6+wJ6tXzz{5#zWn* zExIb=?Qi?NJdVfbiL0{UuDC@J*t6YX|34!y#7LSd3HX8YYzV_02`_15#}6l=3O>N@ z(`TRt^H0RFh@(PC(Mvm(sF_~g_<%3FZqA>Oexyl&HlG#m7i9z0>ro!>y;K&6OHe}# z#IH9(WMnB?UKfMkTm`-JnuvJ*y)lj|O0ute^$yO_?`~eXd@YrK7p0$7Z#;N;qdWvs z?ImWuyU~MLUX9wdsTuhF8ytN7{Q2QSgx6^)qkthe%#zu>q%nlW*{d3QGoG|AP4v#rA zA3VOq>XLxO=%yu==B3Nh)@)!O`8HaVc%rm3R7tM)!*ju+yd}h&)TqF#P_~z|q(#37 znYZ}~0K;n4!L?eq=;Oi|CTFzI-THND?c1xFDyD<7C^8g(!TvRlr*DF$6G~-ek@^h^ zKkf1FSW<0ePe29k_nQZ+<(J=Pes^+g>E|2SUtZ8j2=2g9C|rGpT~94YI?XgbD*RFs zQ!TsSOX^I-FqPVYEsPYNw;Y5BXV&|!Li!PL^Ai3NX+HRuX9CA^Q?Ds|+qB5_$3HR9_u z$7=1!VDs4J*H)f?(t<6#)#RO5y_fOF_d$>Cu%BNZ{TKArDYC9~w$d0FErfo5bh8P@ z^WY`~aUUb7kDVX^N02~?#7oym6zLN%4pLCam?d-MvHKU|d?3%>s;XS|5MiX&P*lS1 z-(rLZ%Nd%TRNw|wy1lgY#o5^LtK;d=2h{>>W}|dUVb`rfP@#vJAWaN_X2WZK|sWpQA;(pm2o~`Pk1+P+TF|lG8>QB|Q^gttl zN1sFHGemWtH}iqw6aG*>&(H?Txs9YedhCwhCFsDl^jReLXP$`d&C}J=n#^y!sB0l# zLs7aa!*CP03H-~V8q<-va^QZt$Nv>=s8l!?+zCwfzGw)|OsxPoad!xZWqJdX!|NO5 zvk#i#e;z|LmTz7edjx}q9o4%~kRPbjtYyK%0Qj%lUs8*+?@QqdvJ0g-@Y*Fkw8?@m zPO5l^uPhzK1cK_#Jb6a^Bkq^@Lp^0g+2;C5!}KuuELD&9&GCP~?VZCowtwg33@8HW z6OXj!a(Z5DLYfnu-s)%;bXOoN94ihAqCx5|09&CKe)kh8hC!T95b$*E`oTp*dgg8= zz{vq7oRK+jdH;@bXZYH>XgLRWSPtcG6^ihAD`e=qFY=G72Lzb)80FtKgzvLRyV`K+_dMeqy5RHRF$zmFiP#)n ztm?5@>ES!K05BdCudVY?@u`-1RKWMv(6@jo36Ekpy<)epLdD9tX|_c$6k)dzQ|7Dq z3!El(i{)wgf==YAjxFcGjQd@sWAg*ggG)+k`oezdnas7jw0EXZCkXAe+5uZ36A$+~ zYha>mO;5f*1$EqoALs7w1uQEt^Wy8>T^y8Y1F2H!dkCRGDqLQVs2s>OTx)#4TSl-J z@h}lA_;j7)ON$9IjX{slyXc0?M--d)8+~`i*%JTO@Ts|T@uwz!cew-#rb5Vv0@sv!H$H&L^~I`3u_WOqgLGX4R26J7504k`PwL+gYV)fX;-7_bbvDk)fXL^1Nn2 z#d-6~P4z~uxLJ{f+@6Ahwa@3W$uNYK<5xr184_-oe zi_Bbmb1{W=_*H2T5IWnpscjmhvrqp+yVV&=vyvw+ZZwPLghbY=e`B9*j#-C|Fj^k4 zSulJ>;@CC`@NrDhdwG@o;e?S>c#UVjex8CD`+8Z6KVxAU1?z<blvu!c}b@=D6H%IGxlQyy%b+@l6I5*?}5{=Iui=% zac|wEbX`$D`0`=X7fefQ>nZ~Sy0$M}RVM7QA03{bF0fGs3Gn!Wp41-Bxg}s3@p_I8 zNzU}^ngKQf%6?-d6-Ns*=7huR^_4|`WCIUhE1z|^Us?=KvC*rHuGIR|5z}{HQR>?_ zF^O5F8t^H_eS5*XX@jujDsyFy{Z&DMd^7P%hPut(mbN z#2m2&2%-w(k8NvTHWNPR=f^hG+vrv5`o2Mmg9ydOIg}x0zrrvnAPV<}o`?55&-jT? zcwkd7SEtIv@ph&Oc4Wl#>@Vlw!Thstuyfr-k51{?jsOSGiSFu((0m+_uJhjWbHvk# z>irla07l@yo4Tt`Z;F%kB>3lbu*ABTg+Y%`{pGNRQcMs4vohccC_u~ca=&gh$25b? zHWw+NSogED4EvS2u%)nj^~Op&)F-``(MZC&qhEn*Nai= zIBv8}{_SsnVYrBR4gw+!mm9@zWBpliB5RMg3EIijGdmi3&ABS>IYG`m?_BZy4YzA? z-4Hror8QPYKR=_`M1M*j#YfR$-4gps`iGSXDU#sO(7X8C)R4Wo3&Qr)i>SG9m0WYt zUVTSH>(f67?jfTz-0vI{btRlq7DkHWtpBZ;&241BsbGtj7^Xjob zWDYD1dzX1CoxPSfE6!NW!>8-9R(}PIc&It>N`LhH?$D>}l>8cq#6D`|Pgu3rtdbqP+D5)qRo0nXa??wV5&QH!2Xz>Zt2rj_J03~hF9RFc` za>(0o4#YK<4<6&Cy|x(rdEdDaBP)#EEHzzCr_0#1z$v?Dy`0K&He_C3G!H$fgI6t~ zU!cu965(=|FJ5ZGVEf#?R3NDG^i zT>QAowE+I@ zcLZ0U*^}w0#H!?+{NxCsEQKuJHs3>P%os{&Vq$81P3(F~05dxY$vkGrHGcxs;{adT zt=v*v$X&IHf;V5H`#eL|)3&UptbmVwa;`^^E^E@7hp~xZ;7bP|)xSfjcV#YXb%~H+ zoSSN5`Gc`Jcw11M{GGUMzo&7#;PTkTENY%wgXtO+@%iVi`rz-1P~$%pZO-Z&k<^OH zfp>Yw@}zEJd@3?lyDYI{6A00lsrkv&a`!r&X zEb+0f1M3KM+w*v8a$S9z52Ub{VD;#xW0~y&7c(PBjTvSwKAyh;W1=FbXo0=U0!j@6 z#@CNMVv6lZmVApH{v!OPwEDz%O_SPn8i`7s_39c@Bb!V3!0+djM2yRU?@qmmIajGi zM&yxaIt}|1msZA~PA8}`j6Xv;ecogv+pi0=WtthWpq>)K&@ABL$AKyhXfw&Zj1%X~ zGZ)e~xJzj5;)=LkD!NiVR|2By|O32{`odPBOiK#8}s`WN_%5NwT zJ^?PXFHLLY$6}&u8WEdzarnaiRCYd9i3@Ryi}sWAoJ$v+;^Tptxrx0lSl4_(brcEu z&NPHd`qz&mXvLH<^*4mTD?7XT-B*fff}+DD&n*J)4>s{>*P#q^&#+p2=_}|H!XsPX% zY7zJYm&G^5f)D}MEM}QW#EB@LbAzBJCdpP-hO+4Ju7z8kPfwZ`#(mTV6{-{_IP;~I z;t#aH2}W+cvCO@?g^HEFjk+2WUwglUKRx@`UN(i>?@T7?$?cp5@R4J=2Ae%k$;y@hPaeHF)+tLS>Oa+-1YZ z*AL#7!=+>uh??Mqnbq{30_OjGW* zh{MYi3oI@SR*bKIlwn(9jk!0T0KfKgh|yQ@sQBFlLi~3}tD5^u^?v{dNIzT!0^tq3 zMRAiAg{BF0sVM9#4_~|CavHfA73$8AW0w!r7j40skeV>`1ja+ z&ezWP4m#u=bo+*ejk6bgfw(M@XmF4wG1 zDaVGB`$KM2pMedE)p}^y3w(LFUawtKUq!=5QVqdp2k%8TK@o3ZuZ<6*D0T%+hGC)W zJ|)Zu@-sRtqZkx#8zG8ZayUfB1t|Lhv4$ciMTS*sZ}u9~VY7A8n_l!3_qJeNA~Hj|e!{qYbj6ce=->SjL;3h8?F z_k$&SiQubIk5R#8@kxAOMWGs$t?}2eM$7$(Ti-;9u^;!NJu@FQ|3=s};W|_GPeOr? zE^cw+xAEIZE9g)KsKw9E>TX)@wszb+LV1*A_Bx4gN#XD@Xwt>{w>i~mSv0KRWGX<- zSoVkKi@_C|So$c+btkFlUWps%y3>=x4l${iP7};3Y2V6)nA8?&M2_WGIr~q0BsbY^ zCW#BbLfp_*sE%l9*u*u==z?Z6(?Ad8J$QZUK`Xxw;&AQO)ahc)P-)CTYdZAu&z&>o zFJY-VtPqTAjR8!fN%A`_aUgiZoVw40gkGZKdFz{~NUpF&a@vZh>I@9VbV=1RM znYy}`k|{gx&ZA|^XCCP$&dSVe^k}1=QucE;A*Q&(znm5b4g=_aU>Sj;sMl3>FpD2tUf^LK}VX{n_$Xy!47A(Ho{{%JkZLeUJU#T zW|6ed@d8!rw6M~m@S;I3I(nkRjC-|5xSC_TQGZ1({T<`AzU}a=pib=T8=iTX;0+_W z_l%W6OFm9`gy;yHLj-S)?pc~{s>Rv#%lb&%jG=1{ckeATs@Z3?bB)O)heXZ(&LNm# zac2o&9cMJ_OJBMQTH{Tba@nw$e#d#}cMy)q7u0<{Hqx`?oS~MUFvRXLcKgUHi_87R zvdqj<%MU$0zPt^CCg}FHIwrmcpvfcPh}9zH;1LaF`PXOS$p3OjDf>d(1h?VGf4MXG zj>#Q_a-CfVv-rV<^6rM6-UEb8Da3{Ev`popG-A}8WZr@AoQFJ+(Xi=Hl9GQ+Nz`J? zeGR=zk|YTeHzA)-x}UENqfV{eY_~4wQ&x`Ia~`(JEg2abp^`Gz+q=QR`gDv{K&%<9|t zZ6`2SzKOoi7yJGja7&W6j*>22TVh@mtGwc)KIdIaJVQn9wF$3ysT}iHbq4CLkG`8e z;*V1}PM{tDaaP_|HnPZE0 zIlO7c3!d?+S?BA-=RpQg)vCkH57Eh?Y{=J%Zqs4z?-a?u>Un=6uSe}}xRVa*>OGV? z#Xp^`m)BG(VmHmXUuBkk!(J8qG$LO~DBymYNkg*GyRR%jlb!gpyKoAEzX#3i(W0BT zg!1h?NCcF6{#$bga&70&qpZLRhkSPQ4J_MF5eGV=d@|kj1H=Gp!4NO(V`Htsh@|09 zLfV-{Sedtzi7dV+x+Xn6-!$LqQwlY{Ii{7#AelKfkG5SP9FT|xYfdr+T`(WM7pN&T zxSnusyXcK$5wsfW?Z;;Ul7nQUgx04RL59MlxO zoG~*XzJU1m1?{cq4L7X~*;rVWwDadUwr2#LmE;|U2ijQR{-S9SaTT6oipgZs^!S#r ziHDfW_fRwnitGhMFE#6CgWkg_=BJhB)CX9g-?T^i3pml*5X zKmd#?4L2fMY?kEwvYoC@GIRuou@}jI(woi+5-NnDs%cN@y-o&o^2r&Nh=ylurXw1( z<$d>#+N|#uyq3&9Xt=2e276YrIZeIX4->6fRbAU!NHSfg-NZV^SskJq(2!jAKXF8+ zq}6H+i=qZUZcVIcuvVyL938m;ej|35fR{OwWWO1eo;fMyKjj~WR}|jO6AsOcOwUTt znK$#Vqpy$amFvDBseF(0^9)?#1s-S1UNW)u zPzR7?5{R+FhcbmdjDMtgSXy~pM|h>aaWg7!^fHwerk=muSXzSX20D)~v7UvC9zti> z$x43|EM6s@tSu2r_B8bx8}_{+gh^zjHlE2F3+E(AFZFvb`R?2(T%PA9ee*Vh@gTdJ zNt?}l*29k8v``MbWA3tgNNnCEys@T@c>fcBRMgG|sYbL9GMZl%&N{AB5v4nX!spNH zSLz6!proKLE~xiYL?>l0D_rALDwE`r@O#P0){)T6kvJ{J-gH}iO%~COW_}K&>BPVJ zbmnj6LzSZ^#gqKfo*vrul+acXt#{_MNuyC6;XJT!We-?&y%Ic!e=in3+@-3?)<8*N zmr_OfR!=$>ahwq4eBM0VEwPxM{isF?qZKOgeEOm{1X!|n?xn&@GER0cml5g!wx=A2 z8jgtL_B`8u0sqvdX%1Fa?|6CQacn;ZvK{>h#&VRD*HnV$-6vZcEWB2E)) zS846tri>dzmtQ zIyil#_;_Cv&G0D6BS~XFdOroPGB{Pkwk%u``+b#2_K&AELx zk%bL$Zej57%;S>gcW@))uU&t+ejJ7=G=J)Auz%$|=FhxlxK6Gl*rS^d%%X-KZJCf7 zb+U|Dt12XRiTx!@(@-TU|(*&NS-Y~HsKH6UskmzC?XVI z3#I|zh33>r`Q$L7skMYFd8{FXSFS5{n5j);Sd?yUp-O^QaZ^fGH0+ zj3RJ8N4d9l&#>ikolcLd^_b1jvL~CCqM9#Bwd?3Z`6 z-f|wm=&tcvosqHoZFOUQYC4Pr;YD6I=&iUK?>p@H^X-N`OUqw(CLMdbbbb!hOXG9% zaVzCDTCWVIccqB}l>x&dh82~(MIQ$VPkxBW1Jf;`yttSP0Ux2$Q8;9Tb(j?j@LjAM z#(A!jbeS;DSEQv6Y|#0_wNEHiGGHObx6VNnzhD>x$wV4Mk{Fuu6P`zd{Z(hMH%T}B zx70$T#Try?7Ff?;g@WZXJ+2>~y!Im9OfHDfs!!e{-D&V5ivD@IF*C%_{Y|3gJEB}4 zPN!`z-7tRsG9Q>`FL-(&6pS*|L(XRD>PqGl0f>5&4PRWfU&>^F?v}56?umwi7lUhz z##~Tu_NxmNGrQ%lIF-k(c-LQosbq}w92KrOW47-%UQW;dyv!xFV`-==BadOP_Qll= z{vjtvjfeow=^b5R{U|5$YEM`1k!z*9eC9B=9=(k$1 zsQa9VnM14RQFPJfDgx!$E8enh`{>U~QTSnkKr}dW(Z!l@cVyA_XqEI1Hg6u7c;l@h z#LQtwFC9&eXpFZ`V=tKaOBfZYCG*?kBv_317}#9ELW`4OtfK1x{mI~x@dr#Aza1_l zZ~Wr4D<<(ohQiYZ%()%@J`eDl4;-t*l#|Fc>E4ljaCrmCYTheN?3Zt@c#&9>75#Tc z{j|kZlu*2fKixmRvCtu3xHuL6z_HQx>2Ha4awEC3rjoStMO~7zHN2 z@lRrvaL$wwRKX+@;}|jRn2#c4B(;lBLgL^ocb;;!wFVs!HUAb)y|AI+x*)gSz;{yP zJ-GFHXvn3}eylJUuT1sihfx>Ueq(p>$p7YgrXTy#M-r;xAi6I0@%j<_gq+h)pwx~( z3t*e|^)YfP4tYk(JRzr=43sKiLmMx>FkqZ8##T-tT7X)Xb710MFSkk03>H$)_>UQiC3!Zy%PuS>3A_nl(T&6KP!78 z{xW4wd6bhQU&feT$v#Qz-jQi$p>V~v<)u6ch}r4{5;pwfCyKeWS~71qVOX?t_ochQ zqf`30$ysI%tH7G!9#7aU;W>XHe%~GLc%$?Vc6sNmf?mkS&Gd4QysoEDW&!~mdNY;z z1XniDi)rT_??^($=q8>M%qNqq+@8ubI5pY1g+9fPC8woWwrojhVpuwkKmO|H9*dHw zpVRGo((XLNAvKI~2gsj|do8#0P*|hlvCmB_taFAwVA z5%1PAXhu&~nq^Wqu7DT^2?9S;#X#k=83&chMkX{Kfm}-}>mDs2p2~g_trPO;0Q*Ft z$C`7%y?vnK9Xc`Ulh2LFt*|ds&%t?Sgayc0+A29d(QsmXz24V1K3$U;h`)q?{Ab4u zHJ_d$UVHeSyToPoREX@w+VSqPU3ZF;;oYtMsi>2NRW%+Q;d4t?eWY}v6YN}Oyq%y) zRlczs@^K)T3wIT~Qr{al6fZ-NsO;B<+WnX4s>~FL(^t>4>vr)gSKj^Y+0~)RU;Cc6 z)!{e1?F5BsRsXbkOTlNuteBh55kzU?T z(;Rm`N?R3<;-D|2F1t(AtyBd7&S^H2FjDp0O3Q)h+RYME4Ast(Ciy7C6|NOm~K*EWgx(+(aD( zes(Y}KO}b86bH7Ye!AF1IR}1L9+=uu`J55>nOpJ>%NXDHcL%RCqk#mEgH>Ps5Tbn` eAmN%@pK-nVW9

1muCAe!eDzb_L8lOZ$2si*oDDneB0k>;Fa+pq zPe@*+-q95jXC4YWw88}bYC-Aj*SK8B_y8#McBRG+S5s(-WHR~6mG>Y)zCC~HjNU+g z*|Nn11M{x#ulj! z_9WZTK}KJJgR*i1_dK{fi>ey0ec3G%=2>iO-oR}q**8*2(x^3CzqCeH@X3Fq66 zDy2PJQa2Lug=tP&$$I$$!7udjaIH45o%_sDGQ42BC2ed&SIH2;9c0~LkckU`(Ezo- zCn5U7+`1A}ckn|EZUja&rr3g$&;i^wK5utP?vZvRUE_bJ+;1}x6fDZ@mSY~DuX6Ym znDeMVqLE{ssj(zRZ$1>*_F14!NaKFV@Qg%Kk7_!zP7EA#x2^M8N7_i|6_AG+qICJq zR<|jE{ltK_BD5(J2N>7A4x;^%Mzx41G|)U3oiw3~#%4^H4dtCJ8dj$BxPWXq0TZIl zS3-MyrW&ejZvYY0&BAqzq1Krj)qtcjd@k4h+(jRrB)`6JaO+2bJC= zAO>0R@f7JWMO;2{8|*feQ<%OBn&i)o(@o*M6Kb<=n0x8#3}D#LkC{72^Q6P$q{CRG zI}@=xvkW2rZ8Hcj2t~+O01&@+ke-==R-52i1&85FEd=-HcDBP^p9&H@vjt> z{Pf3C+?sTqMBGFbFeK|~g`P(zjve}x)q^fnLfSji=p|2Q8&T0zdOc%YP!~z~-+)8z zDHp2Ga};B=#{nctM?!Kjg=f^wUxa5tiV6-aSj}8^i+Fe^>Ijm@FaVgaOVA;lo?r97 z%HZ`nah+8105=K~!F+q99#NzXR`gmBLD5|o*bK~?$(W#F=D(nZ6)qq;LI2{*<>(!) zr7mtkK|;eFVqD_EL$F{~prdnz5VLE@yta$WpJQmf4$5N&1a2`Q5_m<>&KMA)TD)ld2S67@nf zcknt<9C~dvj$i->@C(y}3M!Y?QtEGy1OvkR;Q9b^YZUXZooM;apa>KUkARMnhI8b( zMi9>W7X(;m!dHKaqEqQ}uBqW~5GSzM!cU;!^$O*ZRs7v*ZBRB0lXBd|A5EE+kLO%8 zBu*cLc=``~P4K-j!_lZu`0NhWjH*)8U4YP7Wb%Qlkds006h!x=fP$q$wt-*(vn62Bq<&9;@PZQ0e5UeGf+zzl z8X5;f5j4sdiUS;b@;@H7>AJWYg&#VhL0T4v+7J~nlqga*`0pM3Ed=FPj%q$@ZlgaT5+-v+|pV*E3~GS3;+FCDHsD7)@l$7 zz*4JH1gB>_bX##QyVtAI_X-V7LM<`B%yi@m26149!2iL+{`T#4L`pJ}^W}&*g~fuJ&^!Uw0aT>Mv7w5VRePTthe1|dn5dzm71To~BxMjbVCX<6=kLf!Sj{k@qzVO`krciiakh|; z#@l_od1qdGhwE-)M(64ij+<+)45PRegOJ0+(5of2XM~Fg_RveMB^Wfuk;0oWoss?> za}(E3Y9%JDNP;Ga^`o`ory{k|5u-^#~lesFqpH1@4BvNpwXi~*@QS=o=bOnyXE@%J6e_Qz7MqUtD2IM zgW%|ND~R&mBoH_Kkuj?5Cm@g#cg$abHkTaotXPPL{}a`rD({?WQlOd|@23(CKA2C+ z-MMQ@SYz4^lwTJG>Nk*gOLas)x3rO2DOg(tT8J&l01X5ZtQ8NCBN9vPsNC_5wV?GL z+#5FgW$_&2%$Z1)w@kCK7FwAL)grehOq4>jdtre5!~P)_@M z9v$I4?=3Tq+@CX{NKny-Kz942UXHG=Kd4ghA*Wj7N06&gcnUQo9}TO3Y@f9Vttw$S zk90`lkQeLBx9w)!UOwv>0*{@FIaPx6(^l%*YoFrE;cNRRrShJ?(K}w!l(Y2u=QGSc zNbgc$zhPJ=0<}+pb5W)aE9)BsHOEVZOT00uxIRpu2)1nMQEu%}$&rg0K(+F7J(^v( zcZ$avOf{bcAp;np@X8(^iw~+~Z6wB~yfX@r;mcm6mXKmcpkN)FyI9@krFS5|w&N~( z1okfly?}OeUgn*rZ6KV*gDB6`t-GCe7jl^_RqFBh38~cldp(%B-j+8D3uNh}mdPUh z-j02~Ns+yZ&0ypHqoKku(sz$0_m#mp(Z2F*<0zy{ z;A!nD$^sV2=}Z|GMUIFh&)Ua5JNN0U9H(GU99^Ue1Pr_ssRFf;LGBO1wc@mphy3Mb z2)LC;@lFpCf&`ud0%m?ke!jeFeZiIEzOmGhuQi_^*g#+&4<-kKlW3XX3BUXu53^}d zhag0+m}VBYR{2}xeJ6V;VlUz<*9lWK0s4eN1vuZ-h&t3q{^m76pTCW+^wu#5SiBe% zgVxR-qyu&A8$nKH;hjDlA?C5wj(tb58?fq;j!x?ytq{%L&Y?-ss@Mt(0%7m)%ksR^ zRx)@To&1-lhUwS&@B(6i^=>=TeM|7;Imxq52kfASln|O{OmyK^NDg_ix2-F*D54x= zyTcr^90T9+#^|n%^IQydz6c@QGy_e@W}B1PaChsA&yA#O276TlBEQEXjY2atJ-}_u z22xL`AVe}dbeIA5<1QY8!t55Pui-`x%!{Tx(A^8LnktBxz|$V^l@74KY%2g~&?KtU z__{RGnD>Cna)j)y6@8|j&y`}weys0kPB>eKp`GzeA%(eDgGzFO!i9ED2*c*I(+77) zZoM8H5h{rVI|L=vp~1BhxmoM8QBD@YAh^T}%(1$EeTgjnJ4>n-#Z1{oyX z7ccCHk{Z;JVGT><2~CH+mqm{XtDYmyCX=!ypv_=%F3i(s(E~rN1l@%S2yqjaQxO@m zCq#El!pAj9?0=ZrWF=jlGYlj$u2r+e72muvSPSzduyvxQfCP1M_sq!e2jmZCZybtH zh++-LmGETOOqHmPV5gr6>d^b>(wGn&r((4P7CiWsD~Wsm2dUL!xqa^RC)L!b&}w>y)N@RnMX*gf$_blI#?<-Kyc zm%Js+ps?{=O)FBVW<1f1ja$YJJM*lXsjQ0v?P@fd{x&x?r2dE6mWLoF?Ht9^NP)^V zjbIql!GJQvh(7jE%X`ZED!$dKwJ4Rw_YVY9q`wPPq#BW<(HYnU%J!j4$axLdrD6Yz zuxZwVq!fYw7-Fa~_aR6?_k_INQB}hJgf>EQzZ{n-DO;0QZgkIzrSgnEE$tr&9RS)v zTT>L^wF^#Utx!}gA;4!XH<|c{^Un8VQeG<$Hp@^1wza%$1zf>*ef~6sw*S~O(+9j2 z3I+zHDhat&(V{P;+hf#a-`==P*WR{yu16>njeqbD)CJ(&)=G z<60-vj#KOWS{H&cf&c9tTFn=ViuY{p=&|}euJGw{jX>z>+v6=NEQiXGbGl)%bpiJo z;8r`|3l6{`hXkRB0u-LD-UFC~Ci-yM6- z(g6@k#a=_EBlTU?q}VL&VCzZtQJ=XM+?Z%%^J;F$TX4=M)5<|_TkHoAzFJu-?(7AH zu_0<14surk>0zDA&)dklOR8F)g3}l(XS1gy`dh2vq8#2#P)0nF4Pn9qC(Bt~rj)mM zigHCt6&FSGGG=q3T%_l#2ZHy${z||3#~2cSEk*~^n~T*6^6zL3w%Am<8TKNP%DP6ju7Aw2uZiP7XMAMt2cFq-a|CV}xL?A`QyrcZSzEKcjbi2)N zJV1%ejNS>*E;57duUd#4#HE2i*AY2&+M}mn7X;UapZyC!bqO|9`Zup7KL?GqT-evE zs@fO4r!2Ijq2(t6Rp0 zd$Obqu_yHFSDRz45}c*7GE&jp##lr^Sj}Jtf-Ckh`k3;Ha!9WNzec=&eld&>9i3Q) zX37O-$a@#SmCUP;@35fPLg)|9!0JJIErYCRz_wRJLL|#SSy5vwxSFvoGxLF2tl3OI zYCLG7eyyC;_+2*_D}7FdC9G-S^VYvg*GD2I=yU#Wx*radbPFVkk2CVY~SE!OtMio45>PVtfm*ditcXUEdzPLG`%&yx18C`<6rn7zh z{c?caD~J{7FY$gNlJ}{9H>(;Kda2Yr*o1CYzBW zydnJIG2>pu*_zvrfIFeNjkcaS4#e=2k`O=cF66ci=@^t{=FQhMqV2YO*b zv%*{yV1~e0Z79o8f(jM^fy{wH6~TiO*9B!mnIoKcjJ<*J*YOunGd3>t2^u{tv=xFz%Dg5QqQ-6ptCqpU zN2e!Sy1{cS>V0AZ40V!%2Y9AW&3@UEuyt*x84fjqfbWEbQu-utUz`t_japPZN0H|k zHCjgWGwF4ucZT2qHZ3G6K%whE#_!@yKhx8O^bRp9eDrK9W`b~|Xx>xZFfm}oX?H9; zaB2c*m$uRHOB}4URPoD6y9JuO*&FDOkO|&_Z^3QK;Ca7RFT+GmW>jduj0KM`?z43MujliTU%sG07zr&JeUj$}P_e2?%i$Oc6N}3Lc_y_!^<$SQ1C3MPTr50# ze!M?G6YbAeqq~v1N%fR#^834v(M}v`R{XthZ?Ev06)#+kMJ^!8)GhaYX)cGekb2)N zx5;FW|Oo@2O*hffx{UNSA7T73Fe_fmKaItvMf%kdq&mlCHkS zI&6B^6BHxBngs*~(m@S{Akr-g`S|iz#cvlhU|kw46cJZSNgh1_%Gv={DwPK z9l3COwL;&)@~5;82%t8Wy2_znK>Lqpd*AH_;|>449)K|YkEoeObp+x9jF^^l1i}G8 zK@y8E9ZcBQg;@RCyY$AZ+`SX?M_!O}bouiR^n#4gp?CAPhsd~=jjx1)OmS405hs^o zK9!Q@P#r}i$ud8o(bDqXSbi6$G?{!NFgei=kVZR+ZA7|SNJolbD-e>UCaE`RUc-r_ zOQdE&m$YWC8`#FbB_EF@j)Xjr)A0ac8#8F7FZ;=TI$4mQV3dxt0vP7IZad_(Uga99 zWFadMm1NZ{>p$=*I^T=S3=HU@3C_W+W*6Vh#@RI#U)%(Vnvv40PV4O*Ic>zF~y9`e_u)>0no6nD5%HXYVj_A%z2#<;JF<&VWC# zFw^K=FuinEO|6N<>ZrVIuKm+W**+S|8Q}(*vs-%!l4^iTt&;def<`V?%k>(g?{9x_ zTD0^rEz%Mq5=T-pmP8Ma9y10E?t>Z8_axF0fiWQlE5>X7YcuVOCwlO!PxsIXAQR=$ zE(G*AkXUS&M)x`|`T|p_OUk_Jf#ug8(`3Lp3h2neficTLf<6XZIH4C;ha=y>f3&uo z_GSIuc*ozEy&9$oQMmiq81F?D2|Y&huyK=@5-b}}0j)Wr-sD!I(If)2iYMkZ;e|OC zq{BC#M<8pCk<3LDKNV6Mn(YgNr9g8SAWtu|$w=fZr?Yw~)V_m(mq~&gUmNx_R`C}) z<`WYU)7XlFvPz2**{-6S%KCB+LFoU_*i&hh0P%MWk8wb7EH9_$;yzD(zS%fJ+M(qG;ux+PEZv#>eZI~6anjvn?L$HODQ{mT$~*Fb(5eIO-+ zR$hKI@*H=$rb6Iw6SNHRHSggK!OZ-%)iOv5GK+{TxwQXa&>H=T2RL6jyy8vyQL|QK zR28z~CjSF@^&P;_fD}((Nw{JzK!OdoG@j~?oedVq<_=EyU%52V3`OuHF2bK2*iS5@ z_`T3Ivm8O6Ok%vfiJZ{N`r>>^|D>~?mK-!WmzOiE>H$o1LFb#5C`?Z;PJKW)N|q&uJ4>yjVpuZ7oVxPmLf@7DK8O_5u@ z&z@t9Pt8f-# zOw^jS;emdWAOK>wl7fMkYe6y@RcKph@;Wu+a< zM4Ub^2}LJUG`*Rv+vkuF60vl^^wzh;fQL@{qbqUXMj7!+1N~9Bvpg#mpN}*5>(};9&muGOwR?L-MnGf! z#ZB>aX?`^8VYxrdg+RU;(hiwnZ}Qi7guIGJQ~heqSK>F^yO&ktWI<+TV;1>kR9%DN z^9=`)6p$H8p;i~HWFnD#<7XMPtUl0X9;VPyRTOXm@F;RVnSK-qx28m_asXmiuY z#`dli-QqK=`j@(`A#)q*;`l7^xmjyy5BnHU_ISBl3P^csrgzqp%-bGt^nL)WdFVHJ z@)83T`Nt?aDp)5Lqji3Am_Rb?AFv~mF95E1g0Yk$&YuH$t$di=eCg4M(Q+J?a-MGt z7Rv7!Anm~sBgv(;ay+N8oW1xKT}eE@GBzeV3(cL*93_wmHwZ-=;z}WU$38a0;pPgC zGOF5Z990D24Qhl$nKH6)Lnf*atoL?9nv$YpwBoHrp($<$Ju#i8R#{pU}8zT#WZy2 zBU)u;81W%7#H(R^G{yArAZ3KoSu&Ak^VK#h5H(#~?ZA{HRMns+2G;NjvWryWgT(eP zbTl@EaVkN9oIS-ACOBi-M4xo-62K=p1s1bXlf|_BybM$G_e+Eml$*k)Bomm++&@k2|+#OX(eQZM!YX)xT*MR1etF-3Q zwLj~L6Roh?`Gi;XXjE5yMA~g0_eaK$dac%)93h1LXV>2^WeO4KAoNm&COzHXhiT84 z2EMLdOeOr94Wi2W6qO9b7(sOpR%tf`C6EMZ)%>lSPkmK5(w-imNwt@F zH#)w6hSwK2U(D+Mv-fL;0l?Yj+j2*(&eHE~-JdfY^#r)o;^-j+_TqPB%SHV7P8xb|japoPC%k^UHxSVKem8!cIyx$Qlrsq0s`=`Cn>bsi zO_TPz{=;u9Ak0n@46Q^LZXCWmR}|H&tc4ruyLtG5BpZM7o{YJtPG(3=4< z?YWZ;`Tu=Z%^xA;=AuBWP!d81$(k>qyN8V*0puc+2^J3#z|v!NBoddks!^lGd5}>h zP@J1W)bWX_GoWJDs*gNO5@@pisYRv7CPSon5#-I_K(`#m7eKr2^GY5_9~iwsP`P~d zX(;eR#ZEE*hCvD&fk?*JPxy(1tH)R{kfsnNwWGF_E-x~;maHv~vNCE_HOV8l@KZ&5 z0pg4%TD>jBnh&{7y@A~{%`CzU^*|UVtx3GbzxT&p&FI>JFvJpqeVPW0aptOg3FtQB z6Q2$O=gViI+Ezd`og&yT$Ha&zn`VMS3`0-S!mBbS;Q=wQQ_6hK4l!{}6 z8w9g=#g)tu6$3l937{Eom_p0okkX@XZo*DI|-?@S`A)V6^l4 zHA2WfN?{EJvO8%7bO}^iin@=%H~wPqa{Ew_W#n|}N}6ql{K?34fv&@zTqqwXdqRjg zgy%R%c&D86{iuR6FKF3o3YP9~B#pgn{jpNHF7uzZo*MzvChk77NQBvgc)4@D0B1VZ zHZ^m8`;?4no0R94Y0m@d{;?{BZE@~8c;6w83s)SUE6M#GEYmyt5c!K{x$UarWh%Sd z&9z&5vC_OFs2Q&hC1mZn0%Q>FTav#8WOp^h(SZ!B88Kh#_&i^{FaJ*NWA~oq2lBwO zj%SvPY@7K;o*=)^j9G`trbb|v0c^^3b0CYsXmDPHW=r1uqq@}j?TY#lLVDk$luN_8^NNFY*` zXikd)gbYH;z%b~K!BY3<3__JJMPF)J2S&Q_F6cCBx%G@}Q#Wfc5&6gP0so?$eexPm zLsCy&;+VBau)S}m$2&|Xp`w#`hcEErxCuVZJY>0dnGeySm(6DyKCfiO%tOk5)(lf% zWig^km4DjPPN#Pv<}E*my9rvKrpdDk%XU)E%d)3HKU#~(ifzP=R1F4t0KKf!saLAZGAcur6zT5JD*m;kMczQk zc|;a`ldOg{{uS6it!uCY2;Vnyjz`2>JgkCtVSiIg*<9`fy`4zeBgsF;5wP0Ms-S^q zM7yg!QskMYW{=rW&S(7VY4KdK4lTPayqn`VQ`Wk+N)-l+jhZxJq?LFnU8DCi>YkC> zhzyHeSiIr}eco=3+EEY&TQOgRRG1^X|Jl9JK$w{Rm$hR1FD0T%Q_J~>67d6=+?h}v zQ$dc7B061_P05y0b{pzwLcg5AYZ8-Dm6afAB19}0 ziZbT?H{Pu`D@ldnkkB%4FQ`QtfvY5N(aN$F3}#>6dbZ>Do>U^g&W*{JvS%0=PNpgV z8>NpUc~*eCM1oUm*Oj>&cY-?&M$69LvBxM}52%j;e-KgUNO|Qbos0G#Pq@CLdE5w#3TMo1U@_DdZ7#=8`5v4o3oA(d$2BR6`PLLp zSvruj8P+RO9UomQkQPi$x;dphVPM0z>N}~n0#59Wn$cpPu*KL>U5|71l;vQFmcSSB5JnQ?zJ4YP}|Fz zVf)QQgrPbqizc^m31U#eaI@Rmh^)E%Oa;t-kPic~UUNeG%>Q0L{siy4x=}VI2?3Cs zcSaN07ijpKy>rrL?v>%A9w$ye@e?g)75$?9;)j&A?`eNv4B9*zULtQ^oBBq^Jq z+v^!hle?L^Bo#0FPNQ?!B*(Gge=99`bGoQ@%ji}!pvvVBFwiJMEkr-DxZO{10E2sy5u;J0DXG&w%CWQXIcK%T(TX8CegpRU_RHqRL zeS8_e5DQAvBXM3ZFtgiGu=V9Y&@0EBO_Afx=`(OvSq#*Xx=A5xbt19x5?Zs8EmpmKYB zV74LE0!$f`nE}Y>AMvIPL&nm0uX1V?XL~+s6VHIUqBM@Tmu}|WQn!)x?Zn%&-Zb^b zIc&NPTNSO#h3aw|AO4kZ?9#cv^}f%*+76qnlLrS}%ymK9=dD<&aG1tVPbBgzK-|Mq zDE{atB;WzAWW${j3mtFHSJ^*mY<)+n$DvEFweZWkEs)&(`W8mB9opZB^bUPeWC|?o z4%nA10u_aDVy<*~qTw6#$m|2_zk@ps?dZpH&CSZ3Mw|vrlh$(q0-aX(078vTObi6< zJ+RsUd%t7S8+2^30jNo}z??}&NV|Uk;Q|PPk)+Bxq5N@0t9-I>*t#`@7wCrKQi*`K z-(2#|Pe(=(@7OuI;AF9{lpi#V2p=B)RK22Tq)5cB?v7a^GuBP7Te@&oDXBP0EM$o- zP>-Hqpz%Altu{uj9^d?JF#D31fnwBbJsx@VBlRs<=5eTft$GeARNG#VQyn^Lg#sjy z(q4!ejXI>$4tUW!D^5drJ1YRH#`A9<8^gIa^D=^ zzVe8-vo+tq9*To$<@aH4MrI7jig{WmmM!{{X0r0 zEt_0979dLd^#J7BF65X9#IIv=x94cfFm3`$m}dF}!VJ8fmih#e4KN97A~gr$vS1)J z7ZObiRXpGkc0`A4wRWVLo7DcT{QGQGH8l_!X3f@%9qPBT`(?SG}>$HD$U=?fJp>c&m>vNY!- z*a~xAvqI^mAaO(LS*cAZx2CES#(C38#t&hcQA3Hn;G%@vj5{iGuJ+|smu*?1SO;Sq zp*0lo33_*mjJn;GjF9BGG;)%IZu51o;!c&s?{p|mT3k^JG{#8##=ZOI!jT2^$Hj8t>8 zv~+Kf`wXuipf~f2S9{My=%5>Hs&4LUwJ2*>gMZGe?of9$r2Oq8x(dFSDF+TS-8-Q= zm^uA%rhkV1;Q!vJ82|vLV5u$(KKdhF=l3ry2@HjjE^)I-*Q^2!)v_8@3NPCud4I?4 zhz3zjUbmI|NY2e!vVy0>N53Sj3K2l-da$%OF$9mts@OyXR}Ia~F`1Q#q2n+k#da6# zY8QpAA(Vamb<^z>Ql@H%QaX-6&koC2+M8qsjq;M}Idhnh-`HFa!( z58j!?!50;0E#Kdgsx>kREQOF=n3RBJO&9fk~+D?(TTo2BZK{{j6ce zL$Z|{zqKWV+*;pWH3)1C@9lo}Fx#6QMMU6PR2=j7oz2C#!L{VsVGL!`485=xeBQrFzmx?0vw^L8Ia5gixAOs$QNeR|^esoFLI<)Je~HfechC_Gmc z`OdW%w7DqrOETbCxejO3gsTy}J9=jeruLle+8p1nXVHV2i}kn%8@Q#6#Rh+&)d?qL zt^;P0z^!dPXd-rc=js=hSARY+#C>B29Zf`;JI({nDOu;{Jb@KaT^h$X2t7b;a`^Xg z&>tf5Q-->d{bl40b0dK?sNffB_@sF6!$;C8!Di#UrADAGx>dFw(wg z$xtlGFD}?)$+1lwtnI@sIY(ft(AO+;I8^7~dc%VB@Y4p!`F9MO?g{#Rtj@4og9kDweGl+_GZnfe-+;z(|_Au{Her?)z*~K?Ga}K6c>g(z!D8KU_i?A_bMy zT*6;`Dmds^)!2|Kx!fvRqvt$lDggI%e$==T}ytWK3~(uqqR-Z-e&!rH!DGlccV^e7GX9xMAMS=NC4aCtv_9I-tS-==zE) zV*ifB-2@)RT{*t{6L>thlxrcckE8^*u?uDEaHt=z1pk3KOwUA@ud*hT}(Vx|AW2==h)HDyID_DoEC{-`5}1x zN3&>;u>BPMk{l$qqXZ}tK5w8M-u~NCEyI8abfJfyZrKF5Ze>5&HHZ=_+wB8#R775@ zH(ii=c#QAlojHB72GvfyPfo_<+znIaq4hhYTegSh2MVfY3>%@lVSvfI z6S}ev7BhIc<`B}ifAXcgdAlKvaA(;JCee`^%O4^|CvOxYaX__$gTy8Vt?m&f)A7BM zwC~k!WM3e#kpNbLM=Rep>wx+7Cs}^pcOQSbB)QO$=u4C386i(&;^|1O?Tx)6p^c^= z>Tw*-3^WXSAwlNn0{Jx#Guv->mv+}{oLLz4YQj|{vE}@|*4s~qgc9$WMPmgrS(fQH z%eU{=e2=Y%9mY^>es5tcoZ=OBNuIrG)V{iFQO5wYOfXR~N}M!wP*63%Tu3*6L8Ox5 zZC!MRreCUp)sU|Q44lT{R{~bGc4HJM!ZlNdgfl%CQBz0bBe5jbpSXjhvifTLq2!L< znt&@yOB9UP5T3}aEi$pyCfE=MBJmmT-1Kd#ilzM}tJ99izo$G0?<9}#qm;bBk+)<@ z`^SOn*M2)PcsSs`CX9o4NTX%&aQuWpS0%87&{35RT9vu^LaQHu=)eKL|0$HrT>k~x zrZIql`hXCVrd5K0asxC68;Qo5W)#P{wCx%F-a^D_Jv<4ONGc=vz2@z}f};nlQTcQ~ zP*ZRtO|*k}nN_dsl+#wr(gL}g278|kgqbV?p;(i3F7p!qj&B{;%(9650x2;ot%F0+ zX9!krXz#TML^N!;loymBz{x~;QMrj}%$6*O%Yhn2C*?`ql?9k|i~Ya7So#Wcix%w<^Z`qx>@%Wyk10yY8<%tkLnHDb z^Rv|t4)Z?M9y;2V7GlNDSmFhibo&03PXWV4(qi&lYYXZzG%C8dxrSVKe`e5larT5k zzMrobzuj>UFPLT3v#D&t7BR&;7wO>5+1%Hj!U_-n?UhNGG(BE+gS{p7CYNAju&9pB$UD?S2S_f(`E_72lj#p_p(2;sM=$`df!D@u! zg%D-KRleOd^J3>gQ}Jlesmlyff+#cGqwh9YmN%cLS>B#sQ{1=G+o8J%xOrSLU0AXdS-eT!UPJ7 z+;5e|dEH*1MtWCA$3CtX~mU$Z%PR9AJ9(2TyiM3Kf8HS|Utq9~)q}fpT;J7X$SM0#UH}6X5SYnEaT6Q(YkH9C08HAHL&sV^B}H==t9l4$P94 zFbswU%EZE)#ta5Z1JH_F=RoqCskte_&}`@&ED|C}6jQdH#SmLo;6t+Pa#An1!%p0w z_aR~#;lkM{4GL; z3Y#L+U!q@mh0$Za{2oQV%+oO>q3-_t<*MQK+?3UB!?!E#ab$&)OOKb)4q|InrysCxY?>OTe7hQd6@w(0fY--r(Mp3GQU~HrW81Cok z^I{F^Oz{U^05Iub_lg?XQX8lfoW1MNbbGfBjHyFhtcRgh8*Z(KmtE&l{}}X{soQ<% z&h~TGIH7Ll2I`=KG&`?<_*=V~-jeU*+ia;kYKwQP#h(J9AV@e9wo?mn(a1LrRTHWK z!j$GhY3s8sGe;~CX%{{Ewn-ihlZh&$r;8)ddV_380Jcn$tvq}NXj#mp=86Lqumh}( zLRP2$DvRs1Ub}4~T{RZpWooFA9T)u2gi%lwKVvsWIRlk`X&p z%YQ8lB7n^#7CMo%@z7GohX+8PC{nz(8Yw*TlEW4SAEJtL%jMxf2AuFHu274~Q#J7o zEuh8W0R$zV&}vlbsfj1(#JFu5+^d5s!CXE4~KoZ@OMYv#Cjvv$KM7Gk6z@q|kpZKJg?kc#)^R z2;yS#j=d7Sz=;E{634i4Davm0$@OvDoo`LDGw$@!8r2?Z$p}$yaKCx}Sid9K{OxaxBHsQt>ymT~*UD1Zf z0IkqZ4}USrf*`SMZ%LODS%Cb2nEaAg_42myW*a-o8IuwBA0F-DxSJvHW z*O-0#-Hya$$$jZ^X2wX2)>6b|=#>3@W`ec^1(fVuX;$++qi_C`LrZue;m3<=^#r&o zO%O$ojeh5AU&IT=feSTvZ7>nUtU&t%U=9BlZI*E&)NqH9EuP)=(JH$B_2c1r#JxDK z6s#9P^&buX)(5)X8=kO1`u&$#O|Afg*d|nU=$BPJa1=pvzYRCL`3O~n)w7_i^n!7+ z$8T$(U(_@dxF4v|Ct3S9o!|oA;vfviFpj7a*HMRGa+W19vI9A~Mdq{W?0wU(Ey15M$>ekitO-k#^$_I}hAO&4}c$MIq%lB9zUrD0JME z!w8sHxbV>W!YF)+a=M-NOjJVSfD32Wf*F5)k#dOxX>CkYQLJC!oW!!*Hj=NH{srvE zT298)fJuNV13$6W2m*+xr=&rW8uVP`H!X|QS+9S6gk++BZm7(O+# z33JLn5LGb<2Fs(??))Ir66&X_mY8<2G)!$%fV$X=7s*4fP#b=;*gr{aKm)l;uv8p9 zBvX%zKkx!!A41XUW1PTTQh``Zfmue-Xhmc32^ibn+xO>3ENA!`Z#qNr#Y7(V69wGOFf;DW+Kh{+($JXEwuxv9xc=ytG8K4pgO4=supsT z&YXU=0(DKL2wFr{D=gR0{TesgD>do0=9~oXj8EL2Eb48fa3)FDn>l!kox`id`O<2E znMpbqtQ+aG_S~I^KlB3uo#>dlzXC>>G~A|jfx4K0{}`2J28>@7@T8&A#)~+j@AqooY(Dd)?5=>R($!PWUFMOyq8FuDCtOasxWA&%G zSd+uR)n>^xTf5;LKxSu#@Eb?cObl*E|D20DGZk&do=_9=u?_`)IN(#5eGXXu?hu&&7ws$CmrZK(n?8-#bF0R7w8lM6-BRGEm?DzIvH)$8F19|7a$j-@x88hZnG&J`#yxZfK6gZXVU@H~@L3G7J|bECnQ z-B690?C^K=Unu^3y&|zkBXzVf*S9WLndJzga!J*WAsG2mI+T5uhM5ds|IAZf9W|S0 z9vwIR8Vj>r-t>iC{gd`G@O7*}3MNiPKnot`V1_UXt@}>*R<$xs?`k$_bPLTPoG7zR zrzS1w4Ox&hU-1vcK+t5LY;@nS4K!;?xe7wK3m(gpK3A{IyU*kCM4ERmfLmc+>+8D4JvT^*8kP^!1=*9?lE{~bwF8%#nlNe% zBTv6KZ{E|@AiYo ze>)Gn5kZ-S*NSf*;(lCRxQ#Mo$FpipU?ZL+orYBg8Tv=y4qTXs z$~;AfH8{dJr(p+}I_>R?X%TS|(i|6X%j0nUHE^?Us`P&w6F4L6} z+>_(&3Dkl#K+L750a?WAVVNdoF=rtf6ru7kO<`k8yM8fvOvA$I>7V^b(&pKgvTfDF z(4jPN%gECT!_JzmUcBjROrs$&;tQ<$pP^y41C8>I2irm>_dHW~9=DC%et1&C9rg`h z8kNIkY5KJgic%59#!cl$E$d@THBk8+Q`uiEc_rX*M2fyF0KQ?QeL9R-gAticjTl*u z6J!K*vb{VN?Dd*d`$GS>!2-jlJ4>y@9n_e>P^5CF)JhmO(fE99*67xo7xHgmzcG>* zIv$v=e}73JFI`JPy#xlCh*N{{2N;IXm}z+2;v|{wZTq3{cCp}+K+KfLco_yviG>0~ zN6gX*<{6FB0orn+h5O8tq#iepp<`SwXw8s)$>Ujza&z+X47h=>rkn#Vdz6bw@I0vY zMd^F%rRk4#-+5>a;?L+*NAl9?52u+kUr&41KjFiRqVt5Ffm2hj4n(t0URHxi7CoXK ziV2^b&1>Tf#fmj4n;h_GyJj;RW9KV%z4dW$ZrQ=e8`C?@ ze90ZfmR@K_kZmCOG=G-uQuJo}-!VM8E@NKP6e>|Z;ied4mUO9(X9b}vm(i4o**Z5is(*7Sy=fIuW+H~t! z9ox2T+qR94?L4t<+qP{x9VZ=I9sA_{_W29&&Jxy^p{gm5<`(X^`2ua}#{c|piCzQ?iVDWW_A_|? zY|*LG$Z$co06z~C`RLUGfyL{fv7wR)D3&$1 z3IjIwAgZ#txjlfWt$}VgBwcp;9n>lwX;|sm5JI{!E?>IpZlM8438P`GYFf=kkCrYZ zUhs7WIV9C(w1~`*(Q23}R)4YK>0i(=yc-t{tyLrvBN8QzGylnr{=OAP&n*W9~?b8pi`O2Y-|wm1zLs%Ogr_$ zTegkX%EnmD0H%L|O!)&5nDc;C`8Rd{@RLM0(RsV-?u3r4r?tE=n!Kk#&V3|Nyhe@+ zwoZ&spm&Ept^DZP!L%+Av74>yW8VE;UoJEG0;=!rfgwCc%=Z0#R9 zU_NR#=;3rbJrvcUR2l+#kOBFes0xG|VZdy0^D+H8nNkCh92-W=6rY%yh=UWYr-|Db zR-IkOg_KI)1x%40dQbI#rv!WDY9MEp1qA4^0HU=tu#`hl&=Uf~!MYlHU&`Uo1>G*d z8^mvxafp(LKvp-gBJM7326jZ$NE)5FP#+9Pe;5od&Dc__tv2HxZSj`w0H~&_D3+l&xyREnP zt~UEVq4cKi@Tzx#Cz6BxgK7zg^huUR0K+_Bj`l(4lL>jx%DHc@JLpC}c0Zqnyxrbs z%LSMv6poi->!Et^L@aZ=<3Qo0UrB-_R?YJ?JNH=XiEan?$3|rGV?Hl`y z-_qQic9fD82V3#`s@B`4F7x9J(q44GbkE?H1B`&|3@}A$w$>5gJXiMMbNo;f<5_lV zJ@M?jLP5qIYiq(dk^AxSfk^Nz0(@V-E0ZV* zS1p50JChkj@a&NKGro)ipq&!m-S0t&1L((vk>Z+jSSLU#Ns#(Pv^f9*|L$myT;%y* zyte@GyPf&aPY@r{jV(MOt=owXtL%wAr;f=1Mb{_0f>ZNzbp+TjyqOBh=V$aR1Pkk5 z={P=klhrD<;ndWMz>3luK|AmMYTJDuovt7_0nIENn)DGm%wbr) z?=8fCf^R|&4vH6&e9-`GU?t}SGI~z%cAPR$+p-8lJ10MlhX8I4*3dQl2jQoizbgme z!a_gj0V91CRJw-LEzVbU!|lNwG$zs4G^IfBE{%2ljWaD_*uQyTz5m35arjqAW*kUq zFM;65$1N-jLvpMbmR>bRWx1CjDbHFtM>^>21UYS)E)7wieYS9&1<8;8w3-G=Wv`;T z1b-PMQDzznEqw@h?#q_{r^vH1{huQLU(BV}#m~hs{C95OAVE$}`kqSi(?*Vzmn}s` zD~J1x-};yfs*F{03t1j1$HeC&B;xW^Y0lP;9}@uxa6FJ7kQM(s_d(M$#a3Gy2gS)v zaxU}$vdQ7)F??tCi1-|FQO2^HThfeQ4i0+L>BDo@#l5={bLpSJJ5mt+*vroYG5~T; zcMsR=bA-PZKMIQx8cTx%Y)h>9)Z;{5-@O(DL7hc*1!*oH=i(zQR7mj7@2A(pyy7j@ z?0WRWiV7!!rB-_UXKyGCkk0h;H+FqH-OutX>zZD1;m zo&;`l5N#w=?nP7q7g$9OG_fY`yluWBTyUa)34h%sQj)wQuULM#)8pxV`v53}mLKKQ zhd9FYuuyHVH6%l<-gkf`4(ahIt?D0q_JCwi_?=mJkyv*B)+?8<_dO2B$N42T_T9P3 zhR=J%v4KhwecC3QYl4HU?^Af+sb>Cq;73X^NkSInjY9DQfkdSzBN6zl{Ew#zGA7cg z$#$;F_7~htwZx&jnp_UIJ)pI!6?(!z-RvOkT6zdCw}bu$Ts^C`e9~nF+Pe2DJo7TM zwDqWUf~D>juO-j#>vj+PTo>I;Tj0}#h1<}uvTy-OmJNG7U*jxo)$Zwt9gFI&(s$!< zXFAS;=W9Ly`bpQ zviWFx{j9j%BRG1`ZE!QGsa|!}`5~a03g{NS5}-J~@ocJ&lH2r@fjGd!m~xX0oV}hg zFc-PyJd2VtL{PfH_^?g1o!!mw$eyJ|0Sf+w)qtt#LKYLZ2wc|d!Ql$6v}P4Bm=S)+ zO2%Ft}TYG)qHqd$%1iOo_kXnlR`Fx(Uvek43+vB`&f|X z?#E)XC~^E_qRuMS-x|3zQ}1 zH_3Z}dUNg1B68O|U~U9wjBiOXxGu1uYP9!ytb|8ciFy z@n0hUSl0}mmvy3l&u!U4a?5qP@v()Wi$Kyc9C-e*617E&hAP+tDOf%=ZV40KBhiSi z;%C-8sS2GCl~*`6c*P^`OhoD+?L79~Se)K&)ius711KpiKF_ts`0aG+6g>Q@?9|a3 z0$vu4KeE-X##R;q5>Gd+9PV1uoOP^F&xVKRp^6+B1ktq7pn@Jokjt{~k85TEZHThf zM1-}UAQKJe!`{(s9J{0JmC8_IOzGkW_M=mYhh=7dO9dup(2GBS8sc`dShEfM7au6P z3@VD+2}q`6je|F2+p3C5kr=>c5EJp>Yj+S11r;6ZN~e0aRo+AY8%an6A$eE%r($Y! zd%crAA2!2#*J zIT#2`W}84*kf&b24Z=;5A&I45&q_lBa&1VA4}k47@oT;79;{{xhAHq(vo=p@6+AVy zkOuOc*xoX*5Y0-N7RiBHZ^bi{@0EIBzlJN*yc}*zj<<}zRb;ksFBiM7Tm4oLG;Nx6 zL+Ec7XlbgViR;vRqd~euGlF~l33R9!SMTVe+a^dpJdStrl9R|hhFoGx0g;iXIXD2KS(Y(N&=uK=r_Z7N zPL!Z84;$`y^DSg*nW6J49|RHy$S-XW*+85WQ;L3MU>d5)NYoRZTkN9mh7YPk+PC;k zq(R%Lf+~A?O)3P~pVNk9jxiwU0>Gf02@pUW5+0R};)m(&o}22XPZg@7mX!G%*gdl9 zs_9YGn6l*RdwF<`G~a>Z<%C1i&trAqCgX1Z_a}<>JLXr6)=t_)pjIxf(BCFPNe9V- zxnfv&o#X>3bqN1k|2&!PywT^%@$ne|)F0G9p=Gz>V$-99r6W}oS1D|-`G47|K)B=y*tm+;b37!bz1hm^y4A;qsn{fVn4v%T=^wh|%T?WcW_I@;RwW+f(v zXs4IrPAvLt0UMe2!9UamB$p zB{AP+VIXyaQiYw*3LICFC}kS!I(bSp9~J*f8i(J%UFJOXc*4V!`vPqGY|C$)Q6EC5 zEpJEAlp$@=FgVBD@}GJ~y8r{nU;lKw=eg-rtpJnm51FW<`DFzVCqGBLyfJ#RPep=o(eWOe z>iEP-ZFX0=ZqtxubpwMSlxsIc2bu1z5m#|$Aolz;DGXN393=n0gZe*l^M5IqLOUST z08JU^pN(hlyT&c0D=nsdywxJsrX0)DuvKO|7wd$nWklneWU`F1dbYtXc&hzE<7FjF zjy@Qarg7fdHo|h(Q~C3`Kl;JM(ZB+Ou$L?{5!*9;8NNyKi?o-v)L_XSJ;lyg0P(=> z{n6_J1GnWOj2!GVC326n0`0=U)KpL(03NeiInHyM5!vB5Hc8q0U}RuyZaJ&}xI-cW z%E$gn5(EF%1lipA=5xTFQacNeC6lpWqcB7Z-GE|FvAdNTdAx#hWJ+Blic#Ufe&~mY z`C^AeCNcoQGIGS@i#!5x|% zu=WNDd=X~;)y^h#<&R>?He28%XcX$EbsKEvWXzts%i1hRtj7)GLPRs5D{D_nyDXb> z2~ei#qiO*vQ!J9HQ4mlPn}eS-GGvH-Urq*I>nRjyCf$*IiM;g>f}6Lsv*(K%6Pb56 zZz&BDEAah@V8+tog2M@JUuA3sd|niX=_T*@MP>2*b56JJ`?q10H~v>cEd*}><;QpL zFEPeY&jydO)#)@90#!Br3YD4@X9!*#rLvsIBH{zplKQo(gj`}6h?ItVbz^?P*@m7_ z;#HqKGV$1*PO(RPuClxXyBj`w^%}^uUd2Qb2@aP+#>S4?6euNOr8!U!2%NZ@KbwX) z)1%rV#z*@N1FTVhaO|0`EzB~WHln#vQWdyJTCVqazBz$$`~E5Yf~y>0kQkR%H5{2L zaVD33jP=i{*Z;03#UuNGF5o-zMgF4yw{Ms={XbDvN$;f5U+t{-JgPAf{$SB9YE5NP zf<;B5hW+TfB)W}6+IElS0Bop2SF(4pbl!Z!pu`-7DH7xmlVzRq`5Hn^920HRn57rRkOUiINVk*`YV;)*B`tQT@i_b7~ z9t3FY7_>lHERifwZrQ<{(P>dxp2-rA0=MXx0yF~P)=xXOB4C?*0DUkfzmi-wliiH$ z)3TkyN^{mkeslhCoB=47%1S`+3R-;vFIGW-Up|P_V8sLK^ImV2Et;aghffm-yK^Wp zL}Ck{OcrsuK>II#h(6(MDGuCO{{)8{xQf~<>LGaEhE3?+TW~`qN;{C z1qs)LeXIf1^yQFOfCn1eAN&VQP9zj1lex7u@ZnZ_D+Jp`?k0aS%^np*qDZyfg$AK; z`vt>B7&E5UO#z(LmAI#WiX8P(NdkwjAm`=GCD&y7)!-f606T&zvK@lMP7w4B4pVqJ z`#9DdKE zTl?=aay3omP)bpYRoq7FY$!!z3#na>O1WHjk5$Qw|3LUOQ2}H?MM*?bmYQF)?h)f( z>Gg-T*X9rUn8p=c7~P~k`Y6N1qn24YJM{$-juJEx*}J{&($jkF1B@WGz^k7yn8<^y zU4y>sCLxJXKoTePQkQ`}HhYLjh~4QndXdqSbnFeo*6%U6S6{m|<4`olRX-B2Qb+AY z?soy4SWE}ZEp~y8fC(+;2pEM=hd*YC^s8i1QoON^V>xNV+xLkp`_$CII`Vnb2Nq$B zcGP@y`=>AFi$oP7MYrnAR-y=QATw*LDmG!Fnm(E+h_Byabj@A#A*kQSZtpohTQn^UX){ zr?1N_2aNb`*8rQ8Csq>WCAp((k5TDc$7>xPq}I>0S{wbbHV3Ndj17G>S$2fQet~=) z4q?0lL*#!D^W+WnQDg1Rx1AqwTSK!S2dAsw_4)haw1@ohryo9CF~#DX1NdMEjI;zC z2KwB5lb-rCzhLSRp7q7Qh2+=Y%UExW`iI)~HGt?|szvKjZJlU<7f=A@x)5LM$UiM` z>5w_1x@nClu+<_&rJfL&=K6{fEs!9vzo=ipb)}L9gSC`c679w@wh{ld(hsEd@%NQo z?8l|zSq*haFgNg2c6&Lg=3<-nQ-rCA^8*v_H!BtzcT?mm3GiP*Fk+%g5(PnanvCcQZK4@EyfaQ7 z`T>iJETZ=Vt%DKv=KzQDg;jj+Me%9g-T__$tNMFNcp(h3`TNRm!TB@oL@;N@R)!8J zOroP8EIB_{nZ%PeM^qR`&j4U1A4(?CsU-C7qF8bwxHpl^nWF+Xj$XrXFi z&pF$uOtnis7T*aFF#)VvfEV$pltE_w(I!nK;EL1ChyA38EDBQGt56A;;ulu3Oh!*{!s( zYF$T7L9$H59t+LEaYF6x4~cXafR;Rco&u7&t1>daOT-vi&5b5Ufpwr3vFjEcdP(+n z5h)Y*m%=TZEH_`ix=6XET4CMn@HNJP5zX+IEJOjr{LHow(<2Z!#E4BcL2c;N$k=xd ztxqmVaYqV|u`ObSPIpc;39BWzvcV1x^#| z`VAEY?@_cf4}bTY0D?UcRS6)c-+0uIA{x$Gbl89BQlA9LQaopoQVv0`{n>)UAbE^O zF7el_@ttJg2MY)$V4xlBQoO{ZYa#6Fm>1Pr{O*n1=a2rIec+xnV^No04ZuN%pmPJ`JsHmog}S3D6J`gZb_cuP5LSV|QRvfbx7eKJn3ZJ1Q81_k1Ht z8hio$xX*dXdSy(Zt0hX5apN=ObF`08U~P5Iuwr1@VmVgXfJkQMPB?`JrZ~I@nUDN0 zX|HoC=)NBe%XnF^4?3GII+>vnp`>g#>VcGwhAeKsu;WYn8Zi{Kf0m}Zn{qG>-CD(F z_$)dI1|nu6K%PKMvFg@R)&fo7^XctzHqktp2{U8TY>g0DGo4@JhGm=|3}c6oFLmG{ zCSv;?wZe5kTM`P^)wK~HI|UKs78*9OJ|L%n>94gofwzOdJ)+^(*lZK|TO#s&=$7?k zzRJ3F&?VZ38l@CO)Mos^zBpf6B)J_Cwz}Ao)0r$fKp;->Hn|QZS2En#YhPXHi8RXl z>50r>{6L!gd9KG0lzq<-w4A%&lm**&Ar=Ky#ew=>=JbV=#L8HL26!;qujf!p=T{hfZ*2Hl#iJ8A2s#u7Av@d7kIV1Va83! zo%iq*fQ_z6cP`-#h8T3m-Srv_Z}EH%pA;MrCD{EI_}I7 zi_(q-4~(R>_qc)!V3I*h+~#K7Y7sVR;?s*xpLBDhB|&>X)}7Sh&04;GD5x28ax%gP z@T4i)rdLYS7zRFrMiyf)wEMc{D!IiY3giF*pgUL}fp><`b32ZJn>Wb&{&-t|z0ao` zD#8vETO(#@sxs>pB#b|02R?75D7%9H$tou`b0+kcN-a&|1^xm?Q#M}a9Gy+`R4-r+ zMrx8%GZdnZcIk&Jn}1P!8CZsFmzfm#Q|84%j>0r(y&xGzAT%GDwceE3YxYnwd2!VU z5CW}tN|SP=(=uowSOi%P8+`gasWpHYl-G@6X3q9J-up{za2yk+nolw=fQU(%DmcBy z?%FU;VQ5E8akx0yC!Zt@&_Di!&fv zO-$90RQ?R&2PI+*`#iP@EJCfxXOxaj&?g>?$=>*Nt;8iGCKTcL~lc zzQI*82dl2C2oGc`0%mn&HehI@`Nl$;`|BLX)ESlShD(6P0%~e%w^x3*6A6<#m8``` z$9EIw ziL!h9NGDVShI;#=p{`s z`yG4tM1W~PpX)=sJNjD2s<^#}?>O;lxSeFX3L7!7UV*?3qrRpw*p<96_GLcL0(8e4 zLfw8at_PAMjOr>!o~;F!YGA?+7=)CDaJ2-K|G6j?c*u;&+U5Y$F=^v{|o9 zw>gsk3=&|Ivz@Yyp;~0}ku%ZJbiE`Ma+?L(QFH&^fCalm{t^T;iG|=k)qFds$odqg zn4! z-vY9j7THjUWz=^((DcX!_;_Az>II)55|1SbGj4$%U=udr5s!O+CJa1?tzOCL8%fau ze+wZw9^_3#ZyjK)Hl^RRZyzfbKiigT-0VLq2!ynT=njR&? zNsFNBhM1o%4tTmAj+oF0Cv>7v<}<^>+EmI2m?h)l@(`Z*U8CWScN#l&%KX}3h!=p@ z2Z#Ru_U+j;plBd$Kk?qpKPd34|1W#|P~y-lOa_3$Arn}0Dz-^Y6f#>I7G)Ce=8=!; z6C^|8)+Xw1=z)|;XI_}(?74t->7Qe^5sC-Q1cNY8&R2^@4sW@ZfK7%aCxS+}4S9(J zLjn>59SVX*s6BXmlWZ5`SZd2AE_wTe3byWIp6piVqU)ZX=Yv- z9SRWI<%G;IS@!Ve_wgB4k4CEetHDa!w{{2r4$>v-d01=EtRjdXSx3d>1auD4)bu(3 zscHK<*ejNirQEfyml%A~)8nZ`!vt|zAfl86aa9XbmQ~0$O~L(hX45;)^2*xNSGa#n zXGkDcBE_MJ8^Z-C1rLlmBbI5&5W6gEbPG;(lrFJuuB2?%COT;q^E^08Oc4(jXtB7YO zJEEY$GJ<^&lkxt^-7{+?tm_6ly>V+%giJ}M#F+;tg^K|Ys`WWZ-f3%X)r8(wU4fbI zHq@oi=GIuZlZPju#56{;vBZV30|}xT>2fHgV3A_=GU&SugRxf{kTb~V(32EUrYYbw z`&lWnBL#m$F;nW;QP}S;8GLY*mcv!e({Hk+cL@5LWnDc7;&p#sdq->_XS_!s5cMlX4-@qrX=`T+_bGQc=E3@ zBb-5T%%GGQm~EWMhhuM6x%zy`z(>4NC1g)`wP8B|N6aqcMX4Yar;@T?*o&{VtD0Cr z66Qz>&hNjhNdd``k|=H)2Cbpg3A!6)rC!9K!9ya?`!5#RoN<1h5@d1+@Xsj~%JTsM7N97OgY(-I~RJLH=V2=|?hz8F-S z@Lg8`iFT^Q#vRq8^e?mAl$D+PzE2yM!pS&DHyNFf*IMa&+MP=-h|2EGxSGvaRSULe zBm3Kpn!25CA5R45*hO0U-j9Y0QB0!1^Wc1YB?{;oDT6%i^O$1Q-N^|$rV62+$QMFT zyY~(}_ZmSq>RVDBGJY|t@g*aQelfolDDhH2SIq^~`7=-mA?Dl{Dm%cOrASzvZ(X<$A+!PsZjKVz)Se zLp89-ONR@noLHhF8XuUf_BurqSC+b+JrcSxNz{>GQOE$i0h5^J@cRojy#!qXBhJ(5 zVdscq+ZZe%Pa0ixC&+0sq2+x(F2^-%zxr+nZ3k(BRl(JS&fAw2cPVNq!s!)=>@o@u zUOIO8Gr412dd4^~J}e}Z5nO_*-w}8`}x;#PaKL_35vd*^yvD@T1|ot}R3@GgpY$CuOV^&-Q93NH!{ z4S9dD=A!*lDLCi%PHsZePCi7zdOv!&CD?FMi+4v1-Rm!Y$4&kCb8}EcA9woARTB!G zc^({m8`tGyidGh=$3yws5V>~1>U!P<&6wvxL!)H(3GOeSYL~=zr*cg>V1hRP-@_hw zNOWHJfAJYj*PcjAYn|Rw8z8M~6eLGJ*v4xo@n-OYT9?Pl(Ap|`*cuT7>se&dDL0@4 zZ6o1kQ-_?EIav&A#$2lqYl|=UC?@+)?Ug1uu)~Q5gY_21UXS5B97NRFt{xnoO60IlCDzAdl{PctoCWRHEELM=}K>G8NCiZ?Jbvyjs&ger4AC3IAUm0cKCV z0!B-V6#+E|Ttl7&Sw1ioE@Yu#=hoHA&+12!8b@nVdAhXt6OoCNCnQ4eMa0$~FVr=V zrjAeCXxPu`(`=T=V-~0n=;vO}9H)?QzwVtM)}cUDu^J;SWjjRTkVizCS0xz$$`)|- zdt)e({)o2ax}ZT5V}&g+djbBTz^qz~HIrsm^g8qbWZPq#v|4fvLF=yR0NHw z0;MnkKt(bPmxzWr)P2_!Z-RfmvY#voN7c+w{#q-I`0*uT8?0n8c{DD_@g#O)JVMB~ zicVT4iAC`Z8VPs$Y8vBX#0nr@?u9b%i2|RHq&uvlu!IR)YL2AL>B9eJ!J<;kQ(9S$ zLckaS2^4lZvvB>u{PoMi;${D7otp03=MEfgS(PEJi58n45VbC{130=rg+p865y z_j)JgPg*rSN7-ow!D8s%Tj}JgSf>pYM77;rl!uPZlns8d?%yA1W}mmaKWyDU;_a<( zfX7k;l?}D3#7!?IUK7Vpn$fief~$x`u*J%!+!M%{dl~5SD)?|r6(Wsmw#%w^E+nOo z(sUC~_SXT(k3(~>U2FGGZ|+G`^^ItYeIGHwwT6ubz*H#caUTf(fEkdN(j@1%1U7|x z3`#KrNopGn)`fO2kI9v~q>tQ;h1_@v=+c+s_okC;d$1nN*Ou)d4s9~}3O4;h%)U^7 zFZCYlC>ACAY$^&Kf(aCGML#eZbGYtrtTf>&s z#D&LDRKY^r7z9!OS+b8&eI^rt_2&B3~K?OuoRl zYs5=?5Zf!KDUUNT-08Wc%~l@q$)~>N>4RpcXsHG8IVA2|v;=&VSq96!a58|HF_i}8 z2P%&KMvCf@mR;~0B>l_sr!9};=`)!t6y3OLkk z+&p8>=_C?~tTb9DQIKGnWvJ}Lg>@P(`=JsAR@;dH*2Hc>w3SN9;>{xjT)wJWsT--7}W}-24Dm-+o-SO+KM#A<|moj{5bIZ`_^jCgK^iwyJsh$=AsTaU84H9ZZFEZuBgbe;GVmyW=2CUF57=2(WvZ?bW z+#yy-L)kA`d^`(PZt-JP)@rxmfhln>^% zEfM)kQiuggp}0iprt z_v*A?ZgHz~RZ|*MIvKw|2|gZtRNom&QNurnmynX<)H1z9a?y<&!4wo;mi5b{L#8!# z*q}x;wBMN*3{d`iqo0@nl5aS3l+a_Hkviw;%DQK@Y@vq}N!0}eCR1%%lY(qm`4-oB zKq3aX=eID}g94xTO=HAwqndC#@@?q0u@(7nRzP774U`czbX&K5E5PEHMn4~{ z0SRh#?UMiC#k)VY!v6a+7U7t=p7;FN5afm2r&)WX1K1ul$cVpTf|CP6`+;&O1!7SQ zdRiB~Sw4&X)d7Q%R~pQ~Kl}|p=pwu{Mpf6F@mYlS;4^$er1VuaV_*e5)}}%U5dWjT z$Sv7Mn6yB1P?WR}-k-USg*|N`{%5XB(UAF{fw%WWE0~Ni32z%|=14 z!I1V^+n*LIF}wQP*Mc+oLz62mN(_F&bB@EL*F*PqNLFEVix_Ya&34E90yPV9`!JRGkjiCbcq)z$D$z&kfZp$1^{8E*`*aO)txC6MToJTQQdkMIH zp+}rp)w?|kXjI@Fhc}tSYrYW90!g&ZI*>u=i&Qe1%Pny>^^Inw~ulwga1L=h)WLhbtazSI5TD| zzIQ$x@V_hWlmb#-Vbfh9mYyk6T}w6sViEQX(n<2I;a#XxWdsy_pl-Zx&Ii|O&xxxs zdAfXv+z$212sb3Aq^?)REh}@B=T)E;O+G2WTSb9}i>dY}Jf2vo71Xdi`G&TMx{X8|B)uxD$n4<7j@% z*`D(&MI>V=WdL5*Lv1(lEVpkyxaXag{+O-sE+)COKnmw*r~QpQN-NDJ(OwCGB?lHk+y|h3G~d<|hc4rUc6JOAnO%Sp z2$kv1cRsY6?G>yD@zyo6EpYUxnyandszM@rJv0c>4F0lE;;;BRdw@E0-F}k&3yK5f zK$1nPp40Wj`yi7EedH5gX(rk|8aYfV#*+mNY?O{c9b+;w4vddPdrULA)jTPPSsvOP z^D?qW&Ng73!|e22uQVBXAFQ$xEk>{&P)~K0PM(1g1 z!3FXBQ2VaP-0pL;d=8BI*cTMkpMV!2zF^o<4kp0R!Mbf20Ld+Dl|bKD8uPSh>VUhH zn^_#;wSfVY=!xj1RziVdnRVGiq3g6_a#zYsmeiujHEVvEM3~Ub@}njm^ufmBCtCjH(%5biBm zvQ?egFBwqsNCm#)i#Xp-hM6Y zhHz^kQlC&r$%>+%e7%bs){XxCv2bLgq|*i4ZLxmfq}%40Ufqu&79QR>YY`XX(CJdO z6u_?cQ>c4VvScRfR>@BY79JRhY)jUTfSj(7r5NYRcNz_pZN%Xaj_O#~HF8a3rei;h zNwgwB6$7%xv3D3IhLZxl{jNKD0^Z6ATsb9>(kRt3au^X5IiI8uR-9d~*#s2>8>Bze zn3}rj^b6L0fHxre*f^stn377~gNKZ|h77j%V>iTOUo%>>B+%X;=x=1E4x;G%L)zS` zhJYsxSk}c=YBp)3Mer^J``s}f%jb3kVFVLk=Bc|=7rJ(!-;vevN!8+=n&HW>lYa0e z{`K+Pp<~e2nz_CW3U?<^-KH6&tyJCXyzO=!Y)BW$c4Uf6T~x_=KawQ#FL~;7;Y6Ef zAR=`6ZxRKhTGdjvEu2H5mhdI!AxQjh&$Bc_$SBvKcUsSsaQZxY)wSL3qsP=M~L^Zruom{oj?t4~AH;sIZAUvAcLqSQD zwfzz((Tt4xG}=e}U*c$=KmsFY43=kg)GVW7ha6MEq>eNQA z`vk)lS&rg5T%0G=PZ34x8$Co%>L&L2<&M>xr*(#c-;HIiZsng{Nxktxl-CC!0zX$n^sRpTb226U zMb1!8^@Wc9oRNL$XE7v$5RNc$0m^dvW1qJOnTwy>1HDKy(Rynr*xrfYO4gyr* zk=XVmJ2T0cf)bqsDR=RI4I2TE7JIckj)4OoWZ#8S(Docjt<+%Q1tFXqC%B-BDiWOl zxSK4XbRep17&MQ0w`)7(M8X3Gc=b_a>y5Qguh4!tFGmR=nsQE~@+kQfz!rudnwF3! zCJs*(OjoHZj8K1MY7GJleql3jS_jx(X-iBTL;iss(B%#n2Gw~ORU?3AfY3hH2^o8I zQz4IBjRNXw)?@ql69^UrWFAz)5oLf1H*j_jgA3o3Sc?GvQ&m%j|G#74^}aftW@ z0wwBd1%!eWi#YJ#SE342R8qI~Xuum$}A_OpSOYlKH2 zoLqm@dMCIKc}D<&Aj@RSQAU)eoABHL$GAYgFn8*TQtkK~o2>X|ATGt<#(`ofnXmcf zd%~BH^uzMCpbsr~gMSK;M;CGeC=O)rrQE^0wVAI78u|z_ zz(0;B<=thBjA~I@QgnzT%A8c9w|j%6ag>D{WBrD&=TmWv?azVU_3~#jMaXw-oB2q# z=IL*rXgwWrO`qlUW4NdF;nv$?9*Q`AicYK3_^C1PD|(%~79~~4t{+Dpmcj8cmF@In z(I1n7hO*PfV!-7Kk$(72#R!N#L4==RZ>CD@X<)-awlzePnE%Q;mI{^&&VwBdmi-C} za)*tGA0*{niU(F^@(u2f&X4bx*^)`|c z3_$^DK{D%(S>}3|@S8RmqJdBl&AX~e)m6}(7svVeV7oi`Ll@CBI!kHaF$!Y<+F|}B zvB?(eai+MtBu-q>&%ZMI@&}b`At|G5xmk$<0ZvY+A3Q7y<_9gIxRkF54?@Yg;{r4@ zA`@(YM77!Zx0kQC`?5BT)g-F0q}t^T(-lDHGy**90Wf`hf4W_jVwZCer$MZjpdquK z7RwTtaZl}CHLue{!ti#Pl0hNFe>!Jv`ukNn&O#;l?Q0mQI0(SfbO;)q}N+_*8;WD6kN z@Aw@B@57~Xr7Q=e1xM$n?+enkvhULZ)lh1&d;gG3EQ|mnW`uN%-^NC+pBfyB#KNz> zKTSRqT<>0wbZpT{APZ#0R6mJxV!o?Shsp(8BvqH0hl<77WmB5uk{OA|Ki!CJ{+;@0xxJwm6H$>q&lMAzSZSlI zo)ia^$}z#>>M&7q7`TF;1gbIBWHN*?^}^POV`zk&d=6J`5!gTP-L&hw9d@SSH3WG= z>D4#esF!*BVoSri$$P)pzjyRWh-zf?4!4bAf-suze^u90fACnY7%n|tjR1h5`h*?+ zYv(u_{{!49>wwY$w34=3k^FWInBtmJ@GvQW*GeF*NNB1OmF3+&jkS*UXB}8wIEUaS z77`5d?0|SfxY$!=+|=|_3B#>e-(KGO7uUP6a(deH_g*ir_p@$NGML1xm3`dWa+&PH zO^s72rP`O9x>hb{K1RkY`j+3{n``ggyQbRQ8Twvx&-Us7(WL5<`DdwRT`2~vYM9KygDlKci{7cK^@Uq_!M%Jmv52IEtm&aKa};&^r8k3sQ*vLAc3lU)FqL<)62Q*;`UJy;OBRyh#_g*f z&#La=Ai~QSK~U_gJx7sSudKV@JW3db0xpE)R-BXW7-b?m-I-v)?74ja@P{XSU$qxD zlXrYJ<`UH_TRpN00%mc3p~~E~(JmPOIi?z6rHch167YSuQne%sD2?{66l^=!t{=&Dt!D2jIA^HMm$i`9@Jv~o1o{p zXoiMXMl5^g_9_=$2ri-^?@2{b=zf$IHeQ}!Ppq+HGii>pA}!pGHl!jDLn97@=+?$} zS8NY(djX5u%y;K*C_9zX7sy8Xy{;*+W8=x(2;fLSznO%IcGd}do~&60))5TKS3qFk zyqE#`W2^k}^2OW^k!be}RSRqo*rtBbxKY|g=3z1o#;Wu?kmQ80TK)y9m@>ycA?FkM z+wNnUy9VchM+%D(NH8ELP7yf zQ_pn9qmO#-_cu~QKOTK6S$d{92@5~ArvCtc_)*JlLxmvC`}DiBASQ1FwvK2&$uR&F z>M`bzNAngduAm&p}%9nL=Rx4jt# zJo611S}1oWtpC7IG0T)YD^&+J;u6Hb@bdYZ`>cWAX_*F;%F=%x;tR*#HzOp8tH8N&0KKf!-b%MaPuvt4JXa`zYEa0BOr45Oa9mPQqP><;C z8^!*FF@BF&V}t-A5+EnhM8g?J zh>QxB%mB|2L(-LhIiPK=h!Usq0#b$Ee}H{zJyN#Yq4dvX8T139D2otgVC4bw9WNR+77P-ciF*tkKAMwIqUE?4fuv);4JhV}GF$n{`+xv~(>6h1aVLvr z1__ic;wUC0kmU`7G1B+;f};cWqM-DWebRX<+0YQKwHhR0QV1w1x{O$9!i}6%e?sW8 z*32K$3aYa`gqq@$p8=lM`@)agRR^q!Nhl+wcu`*A$~Y3$JpLsq7+>f30Pl+ z;w}%L@=z}V`V@J#>T1Opasas5e?HJ(pJa`AYCj%|o!}f&o(DvjB14ujk*QD49fC?h z`|1;4LL%|Wzd27!mafdkcf?kFHV!0e|ALi)9|6b9Fiibpmwkv=PLddLI&w(>Gc(MC zRVV`7ZmtOM`~IihHX8XYfTA_$YoN^%4P;4{k0E;?Jj{l6Sr$ev*Tsn6e--mE2?$#^ z)qdHut6ZcME?#wcB20f~<|c0DTyw&?J>SkrTMtlTI3L@=&CYNBqDmh&&t5&f^_)Sa z?c+W3Xs3_VcB9nNt@TX}o{Z(N%Iam6-+NC(M+>!H-ActF1<l4o3cD8@ZZfehBS_8m45}T0=AjBy>bdc z!~Wl155qVlvxfav7;lL|{))nMo8I|uB~&EXEoO5c#@xipN2zS^9R?aQnq{$HWq#I| zj12o#O_~>BEvA7ue|fN}P6n z+lhN@;Y-C_0*lXqJlu3q>>*KcNxW2TixJmh1}Gr^^_iXW^))^ZiNi9Yxp-V^2PvBe-J8UJunE}W*oUh6GsOU zO+!R~_A{zn%g(qRFny2b=~>kS@OKk$v{*uW+q!u}-#*E|5N9uu*sn2S0x*V+3NNAY zx-8o8g;E!Q&rU)bDh`E7ujnCRDg%ho9j)tZl+KHdoNq$6G_q%+!KTJq%ddVtqBTKB zr7)8JtTlm?e{E}m8_~9-WS>X6))Swvt8(kVbW7e}D(P=Cqx}@TsvXPc=?USCRe;?^ zLw)+MTRYe4m>=O~2tB7uK+U-QKBNzR1g_k^cvsSgTnT%Enu^C1ZJt`(SP>>QA0FU@ znw2G{?~9D=DO@2y05TKc-S{FV9wvzBWpjp+nLb6xe?yt%jLZZhLs70mc_%fbfs^X) z?f^au2>c~JdbuE%(sWjT@mJeqnLI|b1&`Bm@D1=O-7iHH+Kw#Uw+at zAPkM*fAknl30VRc=l&}N*mb`+1pY;DqJ~bO6q16gNo|maioFwrtJwDsMZWZ1J$TJ` zD|fHri-)T12UJ-)ZEYkzYA-SWD$=&w@$n%AahAKoH%EVMb1o#|T5oNa#P}`=l;90E z>-z8Ajj--I6J{RTFlBo)oCfX6|rIhR{XbE(_@5o8*NcQ ze__WTUH2{&gHk3+5FBNYMIGv!NPD_*V!l96?e{o*v`NLVyqaW6;?J?cP#SYPmvPqDY|D)K=@bdfX|!H=dq-7W9aKIawb-Vdq zqK^c=ajWhpkYdK_jzCv5$M^|1r0N(}MZOo7LcZFL9YmU=r390%oAc|x-n}cWBsH@= z*BD**O}hSWOShe!EA3biW=!?!_I9(bfi_CodAq~X5i5VNqd8pORPBEKtt?N0Nhzk1 zIXaqS+Q~fQj27MQ+Q~L|*Sk%s(snG#i3kkje|+ML{MGxa`SSd?`gT(-Yg!Q03D_GM zT|?!JdXEGZzKwzk?Osr!69hGlHtEYX=#r}UxY%}qC;kT>tOJ3m9-eb1BpeqQqJbe< z8{PCHH{pM=?}YC4!pk|JLqQfKDtRXhzwCeh<9(#nA63(=KW&TB%01&@01SqcF3X_i zChIciN%ufQZ-AyW9MuL&5|9-QqkzQD&4xO5S4GJPG9qEx_hsp9fjOZEGOMMItR+!Z zoVhR&RXUpzW)7%H6_Oxf5i=IZbZV^stZBSz%8-9*0e&7lof@7_iTT?@)6xdF`*lQc zJi(62o9fGY`)PD4+#We|QZdCw5Wrz{T981h8$b-`?=%G0L_UgmC z7cXBG)ex$A>h3m4JnXWpVnro{O?w}js3wN^9p`OO1*$!@BKNU!g8R-eF2cR^1sn}M z3($XJ?VFZm<{Rd@m}-4gN})+Ph00XcgmAHThxWBXVyBL%<}ml}dcDN%YwAu6*%R== z_F)ftbQBA`vxB3E#5aGipHA8_-+BuYk(C4vh~Enwg8nYHBHZc> z(U+Nz(Ne6Fi5lB}c0U{wwEjhKEWugm_3v8;$8h6&5E*<#uGBFiYvDWZO6!Ek*h#2| zH6#I+fXJYX1j^V(CB^bH;!_cg!7nxL@pd}vp|Oll`AOE7%Z$cQBQ%Ceo5#q5i#30O zVyK;fVgo*eeuAa=t0iAQP4dr*ml8ltbuYc4%6l=ZwyPrkfE9hfUk8W7*pU@=*-in? zvz@wS;or(+wJ!d`?43KtwXuCYxF6=|w6)QB$hHRz`x2MdM@DL3N^H@WrnCuZZ zn=0ILi3TXuJMh3pv{rN<FL=FM4AhCwVXf3&m3nU|dmM|8w(w>y6->ljlk zTRIksckCSqP*P5~ot$Xr38{k9_4t0uO|hb*aPN~ydrE>Vgd06i{(dgWIDX9mCBzk= ze&i8^fkl&GKUaV-IpEtaQIWAXiC&BBxx=5j-WFv9&{|3V#Q4lHz{evqF!40NoLKQx zPXnaNGo$2KfXM%HV&H#4qp5!>!i|k}DlgFYsV+CsM~1eb0uKolOzQyd0|b&s;6z47 zYP9$zOF{?3T)bn}7vRh{j6Q#jPyELFw^WYO zDGkx&j^ZAim8gZx-QChd?#C#tqWZEW@Mv5CBYYRV&YNP->PLyh=oBG0~*UnzG$OA&8ra zR+t8BYJg1O=8Bq`T3V`86mfwmV;-Q0ge&xZEKnq_7+oybrJBA^VCTS9qAtn{;?Sq{ zSDamM&(ak+XVUmFXeVPKkRr-SqDbr*Uf!U!$yNdUwEc=^M+~96V>91K3l_-{GX`p&=&c=mc83n)6CYOsB zmXgeROr7_$J{OjP@CjxeFE!0(C`=x62ovmpktU}o3_^d<9;6G{h7#@pPFIw^HFE4} zU{tAL(}^FRvQe4ul-;Fwg_19gncnb(Nd7Fpiy*8QSffFe`v<@E2^#e9_sYrvlLL zJZmmqZsL)2$f%vmuL@e6fIzUT+;!)=DZo~I>oVEWn=if`o3# zXeqrfIr2 zDLu6UqzXZFE;x!Mx!@ti(INW|f#+PQ=5Bs@XsItfYz)>IEYQ##dM^*6KU0jUVB~0v z=sSP2Qus_hfHmm*>+JhOQ&17p#89FR+4KNz^GoLOS21E3jt~7JOMl$Jb-M8c7Er6M z+B$6(BT8%1ZZ@TR0ZqP_f)0#_Hg$eX&96i;B|S=GP5(gwarwx}|M>F#hoY@h5HPrL z>N%&40Dyy?aR2q1j_PHU9nrmi`1z)y47`8u0hQDH4VGOP2lXod{Q&!`FEPKAr|0hL z{Nb`9;DQM4O*>w5Q(vW?%T0x`!ohbrNX0H;S<^GLt^e(2=PgduX~Q%23e%5nDB#kB$<3nWb&pZSC0R-6dz@x z5~ZYYni7LffHq10uJ!J{Om6s(SBVtpUGiThdY6#&0-fjcQSw*o&(=k+_2aL9$SS1p z(yOF*%i4@^2#EnFw8f-%le}wB1`nf7S4nT$X}>?e>%8w>@r6*n(1IY)tU9~mHH)L6 z_Nzj)at}=ccvXeyT{`_7q*|A~l2u?yD0I9I5?Z<@t5_U`CcoY&a8j^gRio1WYHPXpXVo#waI<4j`dHysBn*cyTS2$e1w!CswpgXs$GM-2kW8%bR%Q|; zEU2Wbgh7)_4L{XsQeFTHmgLeJZCY4kf;NXm;D&`1@!!1VNCX!O=aBz@1)Iyr8ciUK z(@FPH=u?d%WJUD7q7|>{44Yjk2n#vY0AaqeIWOxFtm+Y5j?-?=Uok;)4(hV=f2oa; z5?U>>e)#xMl=z@fe7ACVe#n!0{=m?7?VEHq2-oq=Bo|E;)}IGp1^->tBcYTOJ3!QVD6zR_L``qSwA0^bW_e-0`xUqledS^mIH?&# zMd|*Uk>DxYLtNv3&vh=^5Y2Nh+Bh?GC1B5ShCWu2ERE4-2NJj@6=Jm!IMFt;`7s9Z z%4M+#F7HaIzbz3o)m&R!6E3 z<%m5^h4@=)S*a3P4&C3@?Fwldo;;LxnciOh@%yXSm$eDXq|Ez;FyBSx$%x2))@r+C zJn(39$}mIZl#28mf&hgjby7{Z$#%ZVSQ9D5%C?_VY(u|mii$Ks!F~@4h?$~?SPu%2 zYXM`a`UqryVduHu#o<`!K`86vaI8wij?S@mj5t-=4hO(jlUh;jEwj^SNWIt6y7b{; z^tCwchu2ZU>o%NZ^HDmxpA;iDw=0cn_WqWo2Q=yAa?GdV*7I5C~KGw z5v|ayG!7iW3H9_n5BxZ<;gx2b$ie8iLtbkmkePax-eo3`U(Q8;5F#J63 zM~{LI+pJ@Uq6dz-q^3Ihcc2Kr*xgj%am>3&B&ia&qi?6~81XeE$wnkec$y^1>_?Jx zQ(ne@>T=I_YnOEiqDTp4Sy0k>XZKh;y9b)ABkb=26CLLxu_g$w_erx&BA3R94GSa? z%vSarBUlHoBGwpz95U^W80Qzew+PI8&{Ld^k4oIm8y`>8VNV%sO#K{1V(cKbKy!k< zpveKu|3W*DlWipjoRe*vq*|GR&md`2s*f#s|@nhQTzqiZ~GJ)c?rO9wm zXc-m~vwsRaA(dAL-e%}qe&U>Cu3|y>_Jm5|lXZ&1Q=sMNzP}X`zAT}Id}M4t*XhtO z+16emaaFez!!9%xBrUvgm$K1cGC-_;R?Yw;&M^Z?mkWk08k%<%VMft6-j2T-mw{#j z69O_YlW{5(0W+6T&jc!$`hNo{3UZVE?{9`f$&y!>Er0_UfA5X|y%I3;L?l9RmKmK` zYZ>P%S*)&p{*@<7`1m&AO!+MN97I=%GM*WUKdh4XS3h07yt(?@cM4Q+=D8Qi%^hg7 zEK@SE#xf;*ako_6<;lxa)Oqh&*oAq31hHSh{J(hkcj4qe`DV9AfcIqMdyN9Wl5_1IHd_9 zCP8o@cbc|66lKhCH`FE$0|0~-x90(%cb&{w2FTlU-CP;XaTJ-QcPsrdnN3DEPC>26rX4thhah+;e#|oKnX~FVe1HyP^`-CuqFE;X z7<{;A;=}zv+79BweXVX7W1w!>%aDnY1D8crw8hkJ)rbzAv(xY>hdcd+8-5EV3u3p( zw@pFJWpg*hS`2tV4N1$p*FXO2#jDp-D_Pi~fA^g*tP+5o%$0`xk_v1EADL4!<48<^ z%}9G*FGU^vxQt`Et<3!~@c5GwF2i}?%@=`(_Q1GQb_zTWTc%-1DON(P}oPf&FQ=5huI(HsNM;*pps?%r<6vC@-7Ar^6R+~Vs zFT%t|ZoCi^v(IE=_D{gXv_Cyfj)Vs2k97XLGyxs_^t%n=@b=IfBPmOwGMb}3-pn$Y z=C>7iSaX-kzt85zoAh1LY^%1RNpL%ie>RQRa$)XnQ`2Ozo`G}Px-1%+*lgRTTtxC z?l-9SGws}DW6XHQ<@DX`6EA_QZSSUhy_`}gq1f`>W$%}lzkwGI>&#R{HPxarxTq$x z0|ccXLy)qW*9Ab4DH3GR+~`J z!X|V90cgmUJedDTFWp`q={HY|p1|*1ay5XO{p;kE8|RdAgLquSdnVG`Z95eh&zVvF z*|;2}?;>B*N+BE$!f}VbcIaM+NwK1rGD@|S8W6V)eOTlT<_&(U%BC%rfAn0g33>vG zzBghVi(x8Sl}acUTB!8j_Ymzc(?@o1ml?Rx9BgglZ91s#y!Ytlf&LmJtxU^?M(kuM z0|4`Lo0&+Fj@i|%~4_PFvW(G7Uf^f1H4TX}FHJ^bBp>1Az);HW|t_WWZfsg*pq5xFfO;e0N84 z2mBFQyg$Jm(55+1EIwYmt+z$zkIAxWGKM=HAmRP1AAX!%02q#P`wJ9ngx@ljM+J^k zd-dY?GN&#>r4ciuXcl$(&~ECm8IfN19ceuhwWF#94`Ef9ChoYTf0tXTa#KA)L%GhL z+hve-_8wyjRBwQ8SrSz_yP!z!vQu=We^Bso}w&xCF zk2}oqhTzQ_s#lIxe?W<>OGqdORfP`_L_i_{=L@|(puq?P^$bV;2#l z?Zfd#s9gu(a!L_AI-VWvB=D$AFEVH#Q@s+ak%80R*Lx)hs9mP^dAUbqL7UQhf1Kab zBg0{;(^I44JT#bP3`x1jE1JU6(G>Cf`@qm1=ta?x(~)R{f9N7CU}hrqgcHb%CJNC| z2L^vlOtr`tD1J|$nxgwtJ-lEN;1~m$%nkl6aL++4!JQi8qS^*zG&J!M_+)ILlf4p8 z6cTcxgca(^5JNMUJ5Qfg-r)%8-IMYTraXW@Vp;}fA=3bO__E%#;6YUkacXQgn zCZE4Cd)uW#BZu9v_<;{G!kN7ITmt$h~YEykpX|`i6fs++=j()-7nJG#c2@z|m z^22?INA*abI&XAmF8ogpvv=l-X@*&19s1%Cb3(xZs|rtgP&C6EeTH)We}PWLI<4aY ziGEkpWYl8;kM-emrqq`0&RMUBM7=#kqBLi2S9RO_ zyrspcxWY=i78PwFc;MZ(f8FCfNaFEFiYg(D(D=$l(Ogg2>41sujx7fcHez>tVx;m& zM5P1TZcj)UjHug1G<4x%@L)L^Hxz@X2El`u5Ih+7f~P2WLhT$TyoPMv148FHB`_XLM*FF*%YL zDu2~lOK%)E629wK@VI;7ko6#ofDs^x;oSwYKoI+|yMhk3GgufgG=U@o*?*s}I2uRo z>5;m7+Cugs)pQrhVpVVn| z2L?-m`7{Jes`zwRMoQ{z4D<9Qpj)wOzJvx%*{uodXoe=) zEI`p%8e%D_uUvxe1sA8f;LwVSx@?+)YIVior@FZ%;ZfCs`85Z%K2$4w0^0@ELP=1I zBn44@aja5Y+pq_PLhJ)zmBQ+RLw~9w5JKP%MSZuKL6~o#DUF8MyTGvr& zq3&%DhpDOhGk9pt-Mb)kr9NE+VXhe%s&bzW_Jpz$mcXbg)Q2We)rEu9*q}c%Ojk-D zVWIB59cx!}56l%ax8lk%3g*rZii44KN`k>~SObfjy0>RjQ0l5^cw2L)fPWgxJ_4vR zQ|E)LT7w1wN)0Vlmmbh;)KvyoZF}GVw@q`sc=6(5^J;f}7hb#wn^)XT1z`V#AFd@R zZvO*fYqt9z09C1nH!w(r5ch1D0Dj{?zWL^2^W)9#{p;;rxD1;gU%m>PH{0Lt!qo*{ z`MPiZ_v;of-u`!cvH2b{Y=5usZXMmp$6suI+TQLy-Mru4?qf)Q_RsClAKreq`z>5L z6PyEjq$@1+_6DQDh|P7zZ?CU+nC)^OnOvSPmhB7|o7bP--Syu;e7OGQV)NbZ=I8BA zpFCe}{=WIg=KD+6N4j!(-eV_XDP~{*kc$TONUPDI2A__W1$^IjU4Om~o4@Ve?84?{ z_{;65cefb)VRs!V@?Tvw_p-bUmpz7d?fvWD|MBfdgRN?knBXDTpFV!PdV(7`C?yi` zG~k|M)`9T`W*r!O6|?G*S>5rMn$?rYie?2|BGk@Cv3P1Aih>}*O(U@`u&QUfKkSrR zx6?_h4!Iw*>Pc=^iGQ{HxqTq>u@B^Yh7at8>lMMC$mwfj-1vL1T|7jVqOEqUPcL&0whzdvGVB%_)s2;bOe}8 zGs55q_hWz=@RV;#yqK2gwQQ4xD zjWE@cFq!L|Fgf6UrsRjBxl~y(3X&XKm}t3$DX(c^FJv0ZNI}dauV9PC=1l^_IJU^f z#1oPIht*DF?12ToFz&!m=#q-*inyc^T+&Qj4wN#>@u|)UcVpaa;6@<%IxtzRRvrNZ zp8*C=_mu#zi+>6v3o$=l;OL!iZYiqxQ3pp=o0#;~-L^^X-R>c?GrCOAeiot#mD3g2Twl zM1HzEvK4WmDON-m)e`-$4PMYbdIW zAfcRrgp$5SrIkHqdNR4Mw2&1aZGWGNnPnCt%73iWtuby!gVMD9|5P1i1V5#m;TEgb z7>9aSD{2b#r-ypiG1hEJxx2SqUF~FKvEnNoE54@Xf$QK_(>#5^{iLUlaceEnKW|Ut zmRw>no5)R=yPnQMr%|-}If%$&P?>-Bi+rW3c`j{A{%=+TW1#Gymk ze}7|I?ekHovGm&sDH4LTm_+q^4h0ozYBLL~Gkbwb(eI`d6m^aKIIu;(Wl;#Wuk@_4 z5NWKD z0e%K$>aZ?5dBFW30ppEd@UX)S=)UW@!GEWXe7kQv4}lN$)nra>JK#oN-H_Td0yA9r zWMIyITj`)S|A3oDud9Hq@|Z%0Ivki}Qsi5zYD(ou)s$p;uxp24*ACyx?=O&G+H@fD zfuIAy>wZ!~MeJV-@xNc31JwLVkH@5vcq-J+6hd+&#CJvkz<$e5S`Uc*6K2up5gWog zYXP3Aa%5x%uP@kd5gN)%vMECR4!?cQ4VQst0}}%>F)^0`?Ew=4I5(HU6$B}lM2Q1U zU0lonNG2tlom}80B}R&wl7cXyXD5rNbdpl=3DTEWo4bc zEmxQRQQ?D0=BdJYkE&H&E{e5(7rv7TOV4ba?o;MpT<4p0dFQPi?jBPap{z0YDCQ7a z#;I zZU~oZ#yH7LyReKcoHD!tz@>#C)P&Jrwl_BjdYx2t5d)==Bm>b0!r4^mv7fd|6wt3a*lmn7w}4tEfYW z71O28r@a0aTL<%7(stCDi%{`0hNF~GO*C%707##rTaPTDv!?`7z)=iFwD)ejDnojZ zaSZWn5Ijs$hV%B!dpUs#HFN}H!ZcyXX_uIb19*ST#$xE7?vnm0>JL$(%c9sEhyabL z*Q9F9+3~?@ZRwL^_Kg=-oXu}vn z_P2PMbX>kO6HxHTHdzq=LWpEryTUCPOp%@fqNq4fS|}n#yF}%yL$P*nB0*1ZBmltz z>STXUR$1c3!n<8=JmHshlr%vYLP6pL1~VJ>6<@KRGu5LxQ;v!+uo#MQZaPvHr?i2? zsAixZ?Xo6buE7d1ja^m3G&n(ale9HH0FSstWkduO(|&*S^DstqCKltR*pU}X;ENVXiN#b zf1Ypa5T6oDLQLVyj3}n}=>(htx01_YD%hn37s(GBgmr}!uBG-{A`og7#$oM8be%@! zA2d!lm1`eZmFvD*RU7~0x^hmq?nPL~>|3OO^JqC^Mq@c^S@!RVB-1b!CgY4n29baJ zB)sYBX0_eeg5|Ai>lNpeC|qigP}n;8-oe&$XzDrGGFQ260v9N417&QW?1S%fX`6$s z>jr@PIN2As>vFTfFuI$w{M>%{&5tQPXsc+PG?vA#s85lRP->(K8!S0{i*g-9lctz! z>XBdy(dj!XCMfrrWpE&GpwH@B=Cq#{2`{-CM+?v4d~R2-||?Zvvf z_O&m>!8g(tu?R*ClgA_8_wb0LcjA#^I!UzeiSta-`n|NH`lS)RbF?_cw9(EfQb=<<<~*{)a8#N^tyFad`oN+v;7)R z^)#NHcZDk~@1tVMpoY?XSb=|jvj4=|XLzf2SX?4!_Vfgmlxe%q*o2fAG`BYlC*-jd zR!EV;PS`FzA$KUpK1tXMk}(znd9iW55mTZ2u^BiKD*=18zj&giB-SPkCYtKCURZ|0^;nV{D=;LLOxNaa~b!{^Js{>qk^z_XlS<8K8f3$qqdWe3EuV zMB92CpmW8P3SHHz5Kaaq9Bk=b`9q+qLQPGhsX8Uap+6QM@KP3 zMiFc@f2|*f z#Fv3)0}}xIEC({(dH5ZzvfF9 z1LQ+qY#VCK?RS?pt!=TxfhY5tia?4}k+r~yww~I;E$xgd9-7h(jCQJNr*IbJbleqw z<+7+yI9!XHugPx<6cmAHv47zv@e7-Oyrh?+n@JQ&9nmZRfe^1t3qpHH^(8!@Tf+8Soa= zI@K=6s+|LFBw;6tUU7|w@(m!=;g?y;acYE|>cIytKntiIOkTVTbnyE6H~8?s3@blB z^DUl)EC~Cb5lPLmrSC9n3AmMj&Kl9i4{!i9{h8K;!fW50uzB8-$2%1igMfEKEOC4| z88fc?sb2Vj5fi5<4&Cc*9g#za-w#K z__&3HKa_P+^UN!qpgmx?dwYDY+aY z6Iv?9yn&t$dNZmTfCIB6D^o!7tx^00<=C3r=?GNSPHg*=~S7~=^CzUM~WMI2GRhvF$Ce|hE`yrZb$_i+ffnsXj`ACX7E6*WdOw47?ot69_7o_0x@;Gq~WG$Y(H4|Oh)q-{1NJKDjj@T1>7aXm@0;8 zwMd9*p#=YAH99AM*oZbNew~V$978H5L-AFF!}-WL!V-}Rm)(+xX12o^@>sBvyE)f( zPrH7@^rKT4X86vk0^|WcQNRxXR)u1?FjmdNSV*^9+mhW;!TL_9AgRGzMQD8sEI5xK zbb+slt`7tYr2KN4UDIrb3O9X+JIH=F8leVi#-ItGHEYX%_W_uLu^#K|=6od0tWibd zx7#l?+~Gu3Z9ET+K1`E!BU0q($#8c(sF2G=bi;p*ZcNL)iE70CUr41kEohL1W^LRa z4-ibelqXVXt)IrCAMn;U1sJHBU(|_U;sxR|K5_sS{xtY_8S)brI%D|@_A)z(`ie$4J})twT%>LH9!_@HXb%M_ZFpT1+_v!BJ46i zY1&P=2%83&+)$SZ$HvkU-I z;z=N;b1FQl0{l_ou)!b7V2L9LY5QvmXJClU=M}~*QI~y^h$Trpu@dy4cxbsx za!mmu4gP+)FZ{MOIHV%(2|sJ?fK8#Dg>9m(+zJD(2*O}D0iKbK&^>TP1bD7-JPA5J z5o4BrGmIHuW=z1=i?E68q`!2Ark!z3B4=5A8EUqEqC|>MSX;&hDbVx{G?~7pjg))W zO(nl_=K?OniOe6${eN?G-(zSOcC-f5Z;nDRXPhMeCmgH32geR{y^O$~jy=j=a%ps( z`J$08xf&b!*pr@4Way?fHRPoi-oW8N$hl;H*C%i*E-p@pCav6HX*CR8?i!b z5Jw!d_F$u;Hfn1!Z}{)Q{@SF+?9bkx)7lOJ!!E^q4prU-Jly0~F<(pyiX>U?kiW5i z(B#Jx8hFETuiB&~f1~naL=HnmLr1i+Bka>@K?qbkkd|EzO_Sd1ub%WoM{?35Kre z#&&?~3qO;O?Pa$xgj2*nB9J#@V`kfby-7KseQy{Tk9MrkjUNz{qCTNu@g#dSIw0jO z@PpS{Hf>4(NG+BX&qhk1ZgRk=>kZpN!#3O3nVrR?nq@K!ltXLTB`O)E{BOEn)8-zS zt^&f-K+0U=TH_ZFy0=8n$L`k^2Kik0Voi|?>}oby3AbEPZ8qh?V^9hrc`WCDGGHp3 z(&`s4GD@1`nw8lYG;Xu9+sLLW&PNm|Y)2LVc@`KFv?2i}>NG)QsVe4xPDb!c_?RFh zE+W@v^|2vUxK>({)|0XmSzLrCG>AU zCi4iBIeR2Btz5h|t^Hz}Fzp_HyL?aM|ACYmvMH-uq)Br5Ce}k`p88)R#lk#ydJwer z6HYqEZ653+$~6a`N} zf%r38Qb;i6!tVpFGD7Dz3b?y9#Q=I}knpXgz(5s?^3rQAL@wys(8R`FhVKx9b?3xmrld0$)+*%PBPAGNBg*$w zJLG%&CmA$2g+(nHGOZ>_gvLCD!&sFd-GfEMk?OX{a8*W6Q(l)e2KbFK%>baN#lonI zb)h|uZ(QuVmw5Pc4tp(s-)ZqU9|2Ydar7gUbPSBdzehIH@|b-1{seSnR`*tDW9?8h zA*pd-&{<>k?vVE!F)%+n9gO+=O=nz!y=va5^%-%f;PWKRI6N>jt#-D#u#g@vO4uDT z+r~DU!~=ZM)*?eoNgGkcX7^+YD!-c)2eP7>u1=3B{9>{o62nV>8-KDr0r$(LBK^8i zFz);>-|BjKBpkf&o^b?BM-z5G=0!oK^1$SedMUuh&ZMOaGD3;SXdcdq!HQS`@!b~Y zMS=+i+Y6)*uy_BEt)+dL_9BH#JDJ<)Uu=U(0}3S(_+*KN;4qub@_>&S)z3y)@_(f#!cj#v9ty!!khcE2BQ zKYahA%vMs0!Wgx>yDXE@oRs)EBn6MH%z~jKPyC$8Bcx{V!6?ID&Wx7MkXRjQpdUb`1L&7)QChW z&$euKL0Of^QaOqW4FCSMx5E#$J=kUlv41T<*?Rm!AjxPP5HC|g$LR_*D0GoCkvyUL zZIcvnbiLjrSrYa3kE3n2mS^of{+fud1u~%7hMqTvhjQTUUCTCd8%^uh54^ss?qB^U zmUo`YVX$4ptDa3+*C^Qvlv)_CT*E?F$UD)|ec2CJNG_wHd?Wzja!K1Ns;gnB?SFht zN598X{T`cG!XL?GLv?UOPb_)I>+XWxD{qeS1B=A&#CntE31bCL1{1t0_tibQ!TP2 zU#hGGZYqs-wn5b;%7DV^#R%F$AM(i)#-o@V0FD7JT{b_So@8oTSxVSlT#^Wm2mE_C;!e3ZguhFjD{#iD*;$8C7`YSt zHzB-a8Y1Fbt{{z?Lh2@gM1S*3%Uqq$Kk^K%LUQq)owT*RcQTBUai+uXF=4TpA<-nK zVU7Z%Mr`;1sqwtvRWc)_OfH)z7O@#gW?>q+HUJ-t)Hr6`md)P6;$N}|_!vYZFd9f2 z7tBnZ%OYCGvn+x$w4T=TuUP~)8JxI{a7GDpA8JOu3|b%4%c1Qs z(&P1F8QmQRK5d2*oPSa}r*&J~0c3STu|L1^Wa*j6j(np?C}Rr_gqS%A`?(geS|Xn{ z&ET&*LK&VZ#yZHF16F-FJ@R2y%~R(zG?R-Eq;AVKs7qwNX09Y*Hh)w)t9^ zC1OZ~c4J3%Bu*WpKP3so4m945#{(7pNY>d36N~4A+HvGe3swP?)I1y;2_)dF&E+z) zP6e?u0n)1`eSaSyvnh(70CKkD(efTNCTByfS%g|MMnp{Y)eD*5gyODoK^eawCw`jn z0&5tal&b-Ol$7b$^=$o)qIHYN^+_Me7!Zav4t2;`m~NpQL03{#NJ>Oz&sVXip=#nezZM*U@cxQ5OI@$oOR%UBO)Tn{S9V^iSH z3sm_YJ0=`^prEh$N(z>t(OUa_0Mn-BUoJ(Vy++hE!L|cRoYd$yTNpKOVM2@&VWx_# z40i1z&ws~6_+7A{ibbA`Bz>5PiARbvt(455>e3hMM@Y(3P5cg7&N3-3GC>y;GE0`2 zeGD^;-Y=|HOPvcjRk5Gm(~=>-FAe_= zT$(@^fB}E=Dg;ZAILj|L_YkSl?6QkUU@~@&k7xQVsB_0a)mbm0AB-(}%L}{qP&LcO z7%*9YR+nv|?G8O$+vLVqVwo$GGhgLqsef~V5&6Sx4=njo9-i>+)y?-6KN2)NrCbE` zH+GAj!V@EMp|Cp4>7})^p(J^8tOk2SR{zK6u6%!b`1ten4L7@#IC4%L*pWAcCK$yc zOL){ndTe%%U~o+~;^sI6J06WKZ8|+lV8-}z^%KUCe+Ci0@n0;2{_nA;79A=xs5s~? z0SzM~x+QF6(Z8>;FuDEt`Xdw0vmi|KjLBvfaB5dx?Kq&$jR^Tt;8^$q-iVMY`DM|J z?~{KpB8~|ZRcF+UG#nnOQZ_Ek;4=0bO$ivh*4U90%vA`bwbjczWsY*}MX^U^f@{&)dT9_&Je){gy-$Hj%1t@U56#H#O zES=qOG``hfj_0@7oZez%=~`2wpYB9c_O#d~bjOrlXk+#zzL^q5dSX4!@WJlRixS8p zprc}Xihtj${+lK7xr~;WH#6Gw@21Bvl9FOxtevzPQ12Ni8=>J4S~^#iv04!QaQoqZ zS56#Zmw{#j6PFsD0~G@?H9407v;!%Bomby)+cpq>&tIWO8L(NA6iIPFACjy|Q?yuD z8`yxfC>V)Ogy=^m$xW8Oen%cH+nGG1doigqd3U_~_}wvyZlff+JV{=RXRDJp7gR-* z5-kLaR_ll|L8xSr6e(e;idIE*9ZNF%w)!w`i>?=`PU5R(wqTivZ*5~cQ_bCfnl~3B z+gGO;VJd@NUeD#z+4p{PWBR_lZE*Ra^^6mqjmyg(4XjN+*v`#mJlvUP%8=9~Q|V}^ zJNtgzilhW4VJVp{QpVz|^^{hsiAWB#etGx#hr7um2qLKbA4ixwy&6J)W6^+ZW=zFH*@f@g3|;v&Wm0EEZMB?F*`W!cif)(G zPVc54Q0)z5M8(0z&>8pV5BFm`IID}R6!B_C<2#4}=!O7sLG)yRs1C|=_j`wuzG`PI ziyu)Z%8uoqCV*ScUyU@SiB@7if{cPC=j^@F=kUV~S|0 zN9TMXAYTj^=n*1dO+Tf7J4=bAKgBzI^yJQX;-T)GZs==MRY*W^XmHbWMiMW}Cb!|> zEF%I!c!=!50Vd;qkDH|kc8~;>)2T~RqF;|>IH?dLXAb7xUzcE~vVE8hWo^+1xI%#Q zBey66zNW834WvL3C3HBn2oAW*!DMXm9Bn%)^323!2%e7^C{l@k1SN`ao#Ec)7U-}# zO=C1&J37na0cB-_<8o6DcKKj{YJR!w%){N;$J1q4(&K2KhR)X3Utst7HK8&u13Auv z7Yc&+;sh2+%o|c3_3dWBkGyUAvans*+@b(a1`|{KmwCWp{k+I%Qm=OTp05sZT*)PVEgAjTa7h=yRD zL8-rxC>fb$3JgqglI|#EpR?YcP60yjeG2xMbMo4AlI@lqD?5qWZv&jO#~7B&grxcf z!(MdO_PMDnn9Bso^N7TdiC$AayX zb{OGm1j2`Z7iLs)+zh3zI~_^yzx0pzorEknE5|zP=w7F>bHjrG<~^hx!U+hZuG3+f z@6ZB&hjhS zhs|IDVS!-h=h!t!$_`mLAu2@%+_|6Xxb&sVPhXY**SxtZFvq`5*!_TQsx3ATC_zew z-Yzt(88(+7p^4*B$RnJh+#2k0msu~Sh}b_&IP?WjUDBrP?~rX>^H5bd2@W*H_}5-Q zdX)HotiZTm%AAg-z)5F^O$SU4XY@K)>|km8O*Kq<6^gJ+GWAIgML85?gP%~c1o&AN zDBVESh=wQ-G2wZTb=xM;#f3^D1ag%nMfm=GLmT)<@L$+wMpbEwyTZn)4a5Xc5&mn0_-Vzl=|pvNBL!t6s_N$Z}z$m5QnFT-7advtPKhb z4xdbZh6>tylzi@5fMrT~4AB`|56!u6gtQ2GyL|ubZU}d8Yu88E3O~iPMegwiFVZ=F<(2XHgB2Np`WnP8>3o}Xr-vm}ogOB7JN}D? z;cb&bu7UN4-e!O%{3S~M0VA9wPM3jZ0}}%>G%}X~?Ew=4H!za{7%7)Itpg8#x=*)B zh{P%Zfq*DtAP;~d4m>s=1)G? z&Dr@AsRFK;R7M1wUBG$7tX6^1iU}15n{Dte)Qt1hht2czCnlLPE5)o7Aj)w)tuK)2 z?2paae`g%1QNRNpCoD<0lc$t_39`f4yAM&Yg_qBRh{?nTH;(lXfI_a|X&<~k`-^Ow ztK$|}B0aY)Js$U;ACh%*R}^Q78mx1CtF?@En9|afY3(W15mp}Nb=p=n$>K3!!4j>V ztTpGFi9`l#As9Da_vR`mW^Jd;Lb1rm2`vAR@&&m%VFH}=th=tv+M+6d!5Cq}qQNx| zThQVNmwB1jMMj%4J$UjCOeV$^{JU8R8&*VUz%5wvtau-Bd)4Cx_QKEc*jG$&p=(&pyWD)a83`87-$GU1OhwEoAu6Y2f72(IOr4Rg< z5ut6v9Bdl^Kb6S6g~*TbMM~G?TY9>=%Dvug-eh(0aqTRQ=B5_qB`_Iu4OO9WS46m8 zX~^!nNKqX?rMM3NCYkVdTQphMG{fgR_&QJ9uFe}Gv{xzmWK3UwERI+a>7#S5YOiQ} zjcW6{!9+qtV(WldFt;fV$9+CY+J&?*F^hkoUZ_HhLy(OvU|P1qumnDU>SjDvV>@G$ zhBor6m|4NU($FkRL=tEPg#x_yG&IRH7q=VQq-LxDoVb(4g&?hO9kNhtSjhEl1E5JV zlklHnfZ!H7s+=2taN1EvcB$a;M>?t_VJ1zOuUGg!^9^@6llpZIIv5ebsib%eVT|wd z8Q7s!+a&Q`pURyJ6&jqEg4z3pLI-j`rP<^erdN8EMrkK^Caf?V2?PGoR&5kc(=|MJ%1 z=2e5-B!L9&Z7IrQ*Pd5hd&HKHlcMvq-hP_mw3EU!05Xjaip}p?oYq^0XfdW zb8Gl~rbrAveNfyUYtTW^IoM@`L^W(Pqft+6ymXq$kWY9Ei{y~-pW4sId`Y*2u`n7lZ#N2E3mIWeL)~Io<8GG7%=aP~fwk&OQjd3k zVL(evDtNIdVUQb@x66G8CWrx?tF)mgw8NcGhK;}Gi8*BSl0Fc3!XypM&qG;%xf3Rs z)W&3|HYNH-Cdbw%PBB^=qxzo)04$}LfT+w;3~z_-WR(V-X>%Nwjlc?Dn7WOG2B>>O zZoI8(p61C2>eZ4EXk}{f`LHAd2iItSsHS2Gk{E{3p5f7w77q8BvE?qB3M^U=QV@tm zL3F;}mnIru?HO=%iG_cxb3dNFA9;q1@+@Agi-UH?^mw{#j69F=p(RczC0X3G#11f*z zSzB-0Mizeeui)ECfJJkz@-PTBrO(JA9mX!peZUMERiZn*-3wW&zUp4ijor` z4_)j-!ntYAH)qb}JLACn=mp-}i@<$vmKRrVg!CBmjZ)lOuAv$Dk&>QP(&sYtmMibq znet%;pZ~u6@#>8>{pB+BBhFwmyY7E&d4r2CUN0~HxWLiCV;&15-*97hlu~=i?&8LOyoV;r8`gfig9Rdy}OHl6S|=|7ID~v4bi=i>oWUvESNLpa~6IR!JI{Y z$fKd)c)#C%enDy(YEejN@JUyy7L92 z3mhsM00eM#ibuW(___1Ka!^iWj63mo-{GcQ2~SJu2j@4H0+t4%Rb#UmgD;!hw1*TY znhg+}^2*d(KLd?S6?b9qv$N3n5v=14mE^uw>RTxA)R#;v@QiOX*Vd#t9GWRl!{J$= zTSxkEo3FtR6j!$AY{*Cz%&vc~X#CZ8@8jy>U(|U?pEyYlJ8J$GsNmHlwTnIWuHiq1 z|J-0XnEZ#G0W$-n3o>RxgR^#=2jI!vP4=KV424$A-5A{`XsNq8+yWaA3 z1ENf;!?v!bim@UY7VFl6%qem8SS6x2@hIjo|aqu;FuJCfQj>Cg;L ztu#PqlnDt3*k{(92 z-e1onHe0^CzM+2lDM|M=woQ9VY!8(&fsbh*7cKvQ0B(9pM}!rP*&`W2{)CNvT-Rx7M>*cg6lwt(3)qB%bUmp!pLGnvr>$>y zfHcW!>OIaC*!O?-&>(>jTV<A+R78D_{DSPbZ2$mFnXvA4qKmnv_6}gM?em;|odw>|toWQz_ z^NNhAurVJ+13U>qYYcAaS{CgTJRZO$O8ph^P;I2Gko$i|sF9Ib?9SkmxT+B*k0KP0 zcna#_#ZD z2FoMZc3|i)6VRB^zKZl&Y>_Xcu)9%5RFP=-2GZ~FGD?e3L4Lyci{S)&K(*J0Ew!Hj z;wFF1sgo2tgm|Cjcx4iZ#Zy*qXs+I56^*SAxxGK2byC1d^7ww6Bw=LQyFo0$m;ZNAD317;%yo4H#hID?=G9V3l4&6 zwS`JrZwf2FDMQ@^{I<>#I9}P*Z7E!?PBKSMXv{haG5iTMZTKQW*pCC1&F6Ad$ z*BX7+uCmnW426{tHN@C9k0@j7MySXs5(W0F*|W@39x=}rG17%gY#O>O%gLu9Lmd`z z_2t5{s$;n%bi^Vl2nNj8;i$NJ%+R`7==cLPx}p^y3NSEZ1e}7&1!$H(lNpT^eV8c0 zN!%YwC$iea+iijO{zL-_WEDq{p?81cV!7#yf4(-kbO((eY&^C*=Z*IdQwMq?CkanQLL^Bu4F*BHHUZGk?KS5!+50%SV zUlkD& z!E&)mZINUdpD?|&gUJawo(`LG4syE+z!k(Wx8pXj9?En3ev`zM+Hv3sovS!(SJchZ z)U|944TXOr-TmJEEL*wcn$>^KJ8^XM9*0Bgra?&vHXNi)pGfIlmM3ZZ;MsGdmJxva zn`oBAxlJ(GUZpj0A%g{R>{?m#t>}w@ZNCpVvf;$Ix;-MmKs7MkNT%COh&NSrBYeAU z#7%-7KSyFoT+p}SgrhQnb&*|u zLGn*52?)|;fEpCSXq$f#6nvYktzkjXs&IjTy4E|eQKo+!GFx3Lj!rA>i(tMf_AaLYQ&gU){n|eec`Q=)(Pp9O1<(GD1!}PB z_448`SoJq>K!fn?MN860HKG6_Y9smh0fBP%|OVe_noj@r_A`hm~SUB!QGW z_2Jkfq*;R8%?2Tnn5!1$W>}7|;?(uvP#X8Jrpr6vbJisVITV+jI z5it>iHdv~)BeoDC0di@ukdg`GZ(nXL9oEV4f3Q@Hn`oezU;JG&gpFoOitg;DEmlol z7Ql@(VP4}HhYeig7H(}}t9(TtMYi|L8{nA^p2L6l=Q0UPTBt!T80;$l%!Tql>@KM< z=a^q4DN~68iRsCD8GZx_%IbtDg2yaLdxOY|^{jGGf9X&;-(^jeKb$12m`-}a?0lg_ zf9jME!VR<{`jPIppqraiy*8nr8P~>+fCLdR65&3&$}ZvziyTaLAx& zD}3xNechdFFzzbP@Ghq$9sZjL!tOe+e^+f?_rKi$mo{tK%GR{dY%>(dn9f(+Vo6Ik zPO`Gu(&rs2#8x$05vn6joEBzyt@Hh1X9pHLV-hB2@$c~#@(<(m#~k?Z3B--T2ncZi zHdfMx&`&FTfuoBZ39Q zAq^(mXIV@f^Q?hkgMl1D@0mOcC~_Srld=R-k5e8FMlGM_Sp%m_A~49<(K1HStv@=+ zApyX#ddh$lfC47@Ll}@Ee`PvO#|%74^p58v1G4|FM9(cbBR`(LJIDQ%&jVi41{TAEl%+OAhf%jyI7f%DesLKsin0 zBR6?*Xq$_&Z4Q`?;>z9N_@&c=Iq$ z`%9im-ysKnHQrV{bzp4kC(;!+7&d1o?9$%CXc$Fe;hg5l-F~y9+(|vLNiX| z-rAk}(Y6s5RM5Iw(Kj9 zS)ISkB&#Ore~Y!epqsvXcb%M&4bY(wP7&tF||D(!a%^5qcQkP>u zYt&6y`0kBss^Fdox1QYSNgSnOPRb~PeD$P|kVnO;ch^p~F&o}mR0Q5JajR8UH?BdG zA~HIje@gml=8McYVTSkh-$V0wuybsIWy++_ThZe?nr_lz2+l`NhN@8goJ&D_lYxdhFU$zGXe~lg3dusRozVQ*ji(XyZIU?9S?45i+ zM+38M-fJ|UMN*dbuK9NE7AOam)F)YmBU<-VP!Y3)C^nul0L^67%S^cz)x@CRH9EL! zM$)tUmnD2m!)x1htlTz%dP**x;X-U<)NuD-%#%s1a=C_U^ z*>C)(ts9zUekI?~*ifehX0A7not8PYP7)@LUGDvhzOcA)2(}PJ8)XGup}77HBKT#J zM$f!K+`j?LhFu-_EG&sv`EylP`1Vm*e>+9MVn)LNeds&SI^vcl$FvT;H&M^^=B5p# z`M@6oK%Odmw{#j6BjZyHy|(|Z(?c+ zJUj|7Ol59obZ8(mFqX6fDwit90}g-f9Y9idMo#UFGq&8u>`N}=fk;rIMFI=}+E%{4 z=j_?VjZ0FU=0StS?(*F45S!dj*yP6x=AJL-7uT<)ns6SaT8qhi1xpqsT1|{rkx=nu zzMT9z)e#qwxcq(o)AehU_OGirN(2XB_TG;hP&C}&f#(J93gMLcuatBX7&8w^_E81$x z!R>9wAseZ1C~Ba9f$}tPE`fi_6CA^J5nD|Y&ttelphANfm5LY>r+_L6SD9=D>49;0 z@*vP2rFh^0AZ<$PNtI7lXNV(nML1j~r->5>gdGCZN<;=Q{{<3y^32ze@fF5SyfDk*js(?^>0AcR7R5$p+fK86YtS zNIsWqr;l$@t*JVyR*PaJ&z2+aF#~y=XYZeRy{Oi^R(MNzA1QxzIl6LSQ`^=cr&+P= zb3Ge)Pv=_B9lsyM`Rms@=_i&r;t+yn!q~j)v#wK!j}SH?L7MDmPL@P56a9>Ys-8)u zY?e{IoCI5#lSI_@5S5o;EvCV;KVK^)(9}^*< z$nw72K~8J^#AMcPNP@C0oKxE0TEH8PS+Z9-GJ{EtzmXqz4nFDs2g<2}xOOK&Ve7 zCuwU8Ig*oA*b=p3tSUqj55^a6j%36h;eypzdIh-yf#y1fw* zv^U_^(=CtJ&swN86i~yXtB5t_YIu;@6BPF&LW~vX15?G1RG#QAu$*zcH%Qqz#5G8^#I@+1pnl?SY*IQji%FJ7+JO$(U42` z1Bx&q8aP;mZi)EXJnixjAyP(hcMXFQ!Ocl5U6_9a8xjcw7cms%z}HbRQj^h0*hPH- zfi?r-d)?$%!3%@(5A*_DCJ~%^YJPA8tD)p#F8z&%5EKPvslV_p1Oi&L9?$ELSmG#+ z+h*-WkX8{Xp-++uS7eLE$|3}ZQd=9bgT?}Z&fmBq0cJIcl(MUHouw0QL1!QG1xO!D z0D6B1aw4BsIhNE;l`gT8uS0l0?%)QxCaKO4t9i@xLsk<=7tqAC&bAIqz5dhf*{pAG z^_|e2)j{jl&}HlNjEnUM!BmIoc|glB#?ao87|Wp7Ns{N|!`j&6BIhw8V&eepgKDBE zASF5w_u%mJI-`Xn-4#`!Tw7OR3P;xy0#tv(r~Xcx^Dss@@L~2utRdN_=k%)iFk&UP zAVRS9{;tj)CpLyyT2ZKEZi$ZMC`}&`wCoe|tPCrSn!d^w$m;{X$(He_pb0|`H!UMr z{;>7cr(;!+;N zAvv1UM7m4We;%QlX*rI9>4_U^C}nEkhaosB(w$P*-$T|j@$?fI($ub@=x}?5 zd(Q}pVm4HRYypUtkzg^HSrn(zf%+7}7K2jtq<75$e+)_i?sYK_4aHUIGJz5q01wRi z%R^oT)h8h>sk2s+B&ug6A%R06Vy%BpwnxAS*iN$*po?4 ziBk%P1~v|9gHx9t`SdDW19K?;x>WJac|VN^nV0KG;R)Dxe9}+TD3xj;R)Bw2<;}h- zJk43{8QSL<7Nqo2mE5x{+WOYDR|347rE*v$5PUXWlsl3#tlOeAAdeXGo<5yg1`+*= zK<*2Q=vi@3&4@C#;J4@+dJ>C{o>jJ~iM5s?*2@I;by-qr;eKL^e_Vjc&_PzU3Kp~q~$>|S0Up@Z=iNStI&}aNk=O`LTh&i?FOMb@M`)u zP8)qHlr3+(OXB+=!{GH#drWG0gx&p}O=2Ym-OrcQ^LQR8YbofsSuza3RxC5cO^ds& zNe)Uz6G@(Se9pp*LTR52JVbG)2E;PswBfJ;*_$hj$BgTimKWWA5X9!d^1`|3sk4bwUvOnIwzy0Oa&+mi9KaX_WO2ZlqLT~4O zgo>ti_NCPt%{YRp(2)@x0TmG7zCa!5B87ZyhItr*2}nSU(i2Hq#?x%!<|Q&d((XY) zlccK8|M71L24vOE`M#`e{FOe&!NO%dhp_#wfXD~wkM@7o?l~IzP^{39b=kpy0n1>I z59{IVZj;lV`}w9sReZ6vy>W7YQylaQKj?>d`ugBY?8@ERRS>*u_p%BP`z4DZGqkP) z!EkdV3Q65JlyHyHYkmOIJ_rdqEf#kk=$bWXpU3uphtn4YmR3KM6+|i<&-^DU+@~N2 zixO6Ducv<)u#_4jo^-B?$GtBkAr2r-@|H~usYhgC($>OPP&<)PBK#_!`EhIrHs)3(uAx7B4<-3IXJx)fRf5w5Ke3IRue)E3?oh7qldRMO45vNRyn{3*ZoowRa zkq8Ewi_^2>j1Q)l2z=f6Itk~0u02g?fN9TH^gL2Aa!3}un@Zb%ow)s1uvpVr=#It_ zH1#m?s1~!sKP=kA*%PW<9uIKNf4;drAF z6K*W!Xx)5HPDdX{<81VHkGm2;Em$Pk1wk%c3KAt%2+bzsEZSe>e?O+@Xe2=a~~B334h$6J6%lMVtu9O_Y~0M)|zD^l_uztj{(#RDyc8Z3VryT~)(a zWBd2`Yj$#Q7$;0=jIy2OVmAK*64EzUZa{Bp9TaM}gi&sSI^Nr>Ss_^E zSf$AeX>los`%I&Jq|ek^mq~YU%l*^dQ!KW9E8T)*Ot$O=Ic#nNyE#XJz&>( zt(uGvuoH?}fBoVfLqXlixIymd_~`TQ$?=L@fcAF>{)d_rYKWSkM(eHA5UJ9p1J2hA z;H0R*>6OyZ`mk=#vu5k(3QBf`DPPu7)BM54+H{z^P;l1SLCdQAQ!J{Q*lirv)=fc& zZzU2KRc^^X<5tq%%);#iwRdc#WZpcpDr`nB!#E7V4HAM0@@<;4@ehLwQP!7%W&;zK z>Ba*UmmAgtHXa4Pl>+Y8+$VRP*uWi{1P<&3ZLaNyBnW7UcOoi@mv7bs7=N>d!tyM>wfRPScyFSMD*WdlLk3XZMngoOds$}IDb{_cGPK)v~ykk zmT|R^wMDLQxJ)o-6&#nW=hfg}joQbc$H>#Xj!9No-;JCsiIu{TU z@>0x5rnxwenTcCj0qWqMRtB^@*LOwNR3Fb73$B4-{--b&+_GHQIpfiz7mPJ$nfQMh z%Q<7n4qCc7<8Nc8aExa|5Y@+E*17sPWJBBUdHq=`FJr#pn12v1mWD$5*^@awlbKLy z*&>0@-3C}@kh;P9Sq3s*P$FyxQ(NbP8kcnEM1_`#Hk{?gUd-PR03wrvZF*uHS{@}j zkrWqt0w|kKnlgF#jA(RKDFsK#FH0uQq&lakm0(8eKSO`dwG<~Ac!V>;cn1@3X0Cy` zg+quj`M)_lfPX7!p{G~Djdd{b;sq@{eY-E(f4`=|D{6}M`n0F+Z<)@XuIWqc-@-qR ze?zA9E=nwMgP#`}z~Nu_Iz#&bV`U1WFwQSc-bsQw#z<8n8 z&0@=$SuRJPNQ=W}%KIkf!2bAt(>Jp`x&&c4;}GRH!oVFpiA7RN2*N+WLa$?>jH`_0 zmoM4_8Xpi1(aISL!d?5a+twb+Y+6R=<=QgIc>q4)z;|`Gg_mgB108>z_6O>JSb3Se zWpI3ItHWd19k$d#aPO4Y*w53ZK0R)CpJEuTfcU8&>l5Oy+#>G?IyR1JCwtYRGjU3J zT&1lKI!+1i_AkAwFU